diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..558b67f89 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# https://tools.ietf.org/html/rfc5545#section-3.1 +*.ics text eol=crlf diff --git a/.mailmap b/.mailmap new file mode 100644 index 000000000..9b63f5895 --- /dev/null +++ b/.mailmap @@ -0,0 +1,20 @@ +Aleksa Sarai +Alexander Morozov +Amit Saha +Antonio Murdaca +Brandon Philips +Brandon Philips +ChengTiesheng +Daniel, Dao Quang Minh +Doug Davis +John Howard +LinZhinan(Zen Lin) +Mrunal Patel +Mrunal Patel +Mrunal Patel +Vincent Batts +Vincent Batts +Vishnu Kannan +Vishnu Kannan +Zefan Li +梁辰晔 (Liang Chenye) diff --git a/.travis.yml b/.travis.yml index 7f6c11f11..7c2de7e71 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,3 +22,4 @@ script: - echo "${TRAVIS_COMMIT_RANGE} -> ${TRAVIS_COMMIT_RANGE/.../..} (travis-ci/travis-ci#4596)" - TRAVIS_COMMIT_RANGE="${TRAVIS_COMMIT_RANGE/.../..}" make .gitvalidation - make docs + - make -C schema test diff --git a/ChangeLog b/ChangeLog index 99ffe630d..e3fae8e81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,62 @@ OpenContainers Specifications +Changes with v1.0.0-rc5: + + Breaking changes: + + * config: Explicitly require `platform` (#695). + * config: The platform-specific sections (`linux`, `solaris`, and + `windows`) MUST NOT be set unless they match `platform.os` (#673). + * config: `process.capabilities` is now an object instead of an + array of strings (#675). + * config-linux: No longer allow negative values for some resources, + partially reversing #648 from v1.0.0-rc4 (#704). + * config-linux: `linux.seccomp.syscalls` entries have `names` + instead of `name` (#657). + * runtime: Rename the state `bundlePath` property to `bundle` + (#674). + + Additions: + + * config: `process.capabilities` is no longer Linux-only (#673). + * config-linux: `linux.seccomp.syscalls` entries have a new + `comment` property (#657). + * config-linux: Add new architectures from libseccomp 2.3.2 (#705) + * runtime: Add a `creating` state `status` (#507, #694). + + Removals and increased restrictions: + + * runtime: Document hook timing and exit code handling (#532). + * schema/config-linux: Explicit `null` values are no longer + compliant (#662). + + Decreased restrictions: + + * config: `type` and `source` properties are now optional for + `mounts` entries (#699). + * config: `args` property is now optional for hooks (#685). + * config-linux: Runtimes no longer need to provide `/proc` and + other filesystems unless they are explicitly requested in the + configuration JSON (#666). + + Minor fixes and documentation: + + * spec: Add OCI Runtime Abstract (#691). + * config: Document the Go `platform` tag (#570). + * config-linux: Remove local uid/gid mapping limit and punt to the + kernel (#693). + * schema: Fix broken `string` and similar `$ref`s (#684). + * schema: Remove `mounts` from required properties (#696). + * schema: Remove `major` and `minor` from `linux.devices` entries + (#688). + * schema: Check for the required `type`, `hard`, and `soft` in + `process.rlimits` entries (#696). + * schema/validate: Gained usage documentation and fixed + `schemaPath` logic when the argument did not contain `://` (#552). + * *: Add anchor tags to a number of spec locations (#707). + * *: Consistent link syntax (#687). + * *: Minor cleanup and rewording (#697). + Changes with v1.0.0-rc4: Additions: diff --git a/GOVERNANCE.md b/GOVERNANCE.md index e5224fbf3..92c860949 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -14,7 +14,7 @@ A maintainer SHOULD propose a motion on the dev@opencontainers.org mailing list Voting on a proposed motion SHOULD happen on the dev@opencontainers.org mailing list (except [security issues](#security-issues)) with maintainers posting LGTM or REJECT. Maintainers MAY also explicitly not vote by posting ABSTAIN (which is useful to revert a previous vote). -Maintainers MAY post multiple times (e.g. as they revise their position based on feeback), but only their final post counts in the tally. +Maintainers MAY post multiple times (e.g. as they revise their position based on feedback), but only their final post counts in the tally. A proposed motion is adopted if two-thirds of votes cast, a quorum having voted, are in favor of the release. Voting SHOULD remain open for a week to collect feedback from the wider community and allow the maintainers to digest the proposed motion. diff --git a/MAINTAINERS b/MAINTAINERS index ac88dd51a..d3fe235d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,5 +1,4 @@ Michael Crosby (@crosbymichael) -Alexander Morozov (@LK4D4) Vishnu Kannan (@vishh) Mrunal Patel (@mrunalp) Vincent Batts (@vbatts) diff --git a/Makefile b/Makefile index 1d5401828..770834b7c 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,6 @@ DOC_FILES := \ default: docs -.PHONY: docs docs: $(OUTPUT_DIRNAME)/$(DOC_FILENAME).pdf $(OUTPUT_DIRNAME)/$(DOC_FILENAME).html ifeq "$(strip $(PANDOC))" '' @@ -56,8 +55,6 @@ HOST_GOLANG_VERSION = $(shell go version | cut -d ' ' -f3 | cut -c 3-) # this variable is used like a function. First arg is the minimum version, Second arg is the version to be checked. ALLOWED_GO_VERSION = $(shell test '$(shell /bin/echo -e "$(1)\n$(2)" | sort -V | head -n1)' = '$(1)' && echo 'true') -.PHONY: test .govet .golint .gitvalidation - test: .govet .golint .gitvalidation .govet: @@ -80,8 +77,6 @@ else git-validation -v -run DCO,short-subject,dangling-whitespace -range $(EPOCH_TEST_COMMIT)..HEAD endif - -.PHONY: install.tools install.tools: .install.golint .install.gitvalidation # golint does not even build for Container Format This section defines a format for encoding a container as a *filesystem bundle* - a set of files organized in a certain way, and containing all the necessary data and metadata for any compliant runtime to perform all standard operations against it. -See also [OS X application bundles](http://en.wikipedia.org/wiki/Bundle_%28OS_X%29) for a similar use of the term *bundle*. +See also [MacOS application bundles][macos_bundle] for a similar use of the term *bundle*. The definition of a bundle is only concerned with how a container, and its configuration data, are stored on a local filesystem so that it can be consumed by a compliant runtime. A Standard Container bundle contains all the information needed to load and run a container. -This MUST include the following artifacts: +This includes the following artifacts: 1. `config.json`: contains configuration data. -This REQUIRED file MUST reside in the root of the bundle directory and MUST be named `config.json`. -See [`config.json`](config.md) for more details. + This REQUIRED file MUST reside in the root of the bundle directory and MUST be named `config.json`. + See [`config.json`](config.md) for more details. 2. A directory representing the root filesystem of the container. -While the name of this REQUIRED directory may be arbitrary, users should consider using a conventional name, such as `rootfs`. -This directory MUST be referenced from within the `config.json` file. + On Windows, for Windows Server containers, this directory is REQUIRED. + For Hyper-V containers, it MUST be omitted. -While these artifacts MUST all be present in a single directory on the local filesystem, that directory itself is not part of the bundle. + On all other platforms, this field is REQUIRED. + + If set, this directory MUST be referenced by [`root`](config.md#root) within the `config.json` file. + +When supplied, while these artifacts MUST all be present in a single directory on the local filesystem, that directory itself is not part of the bundle. In other words, a tar archive of a *bundle* will have these artifacts at the root of the archive, not nested within a top-level directory. + +[macos_bundle]: https://en.wikipedia.org/wiki/Bundle_%28macOS%29 diff --git a/config-linux.md b/config-linux.md index 404072f7b..62e389d0c 100644 --- a/config-linux.md +++ b/config-linux.md @@ -1,30 +1,30 @@ -# Linux-specific Container Configuration +# Linux Container Configuration This document describes the schema for the [Linux-specific section](config.md#platform-specific-configuration) of the [container configuration](config.md). The Linux container specification uses various kernel features like namespaces, cgroups, capabilities, LSM, and filesystem jails to fulfill the spec. -## Default Filesystems +## Default Filesystems The Linux ABI includes both syscalls and several special file paths. -Applications expecting a Linux environment will very likely expect these file paths to be setup correctly. +Applications expecting a Linux environment will very likely expect these file paths to be set up correctly. The following filesystems SHOULD be made available in each container's filesystem: -| Path | Type | +| Path | Type | | -------- | ------ | -| /proc | [procfs](https://www.kernel.org/doc/Documentation/filesystems/proc.txt) | -| /sys | [sysfs](https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt) | -| /dev/pts | [devpts](https://www.kernel.org/doc/Documentation/filesystems/devpts.txt) | -| /dev/shm | [tmpfs](https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt) | +| /proc | [procfs][] | +| /sys | [sysfs][] | +| /dev/pts | [devpts][] | +| /dev/shm | [tmpfs][] | -## Namespaces +## Namespaces A namespace wraps a global system resource in an abstraction that makes it appear to the processes within the namespace that they have their own isolated instance of the global resource. Changes to the global resource are visible to other processes that are members of the namespace, but are invisible to other processes. -For more information, see [the man page](http://man7.org/linux/man-pages/man7/namespaces.7.html). +For more information, see the [namespaces(7)][namespaces.7_2] man page. Namespaces are specified as an array of entries inside the `namespaces` root field. -The following parameters can be specified to setup namespaces: +The following parameters can be specified to set up namespaces: * **`type`** *(string, REQUIRED)* - namespace type. The following namespace types are supported: * **`pid`** processes inside the container will only be able to see other processes inside the same container. @@ -35,13 +35,16 @@ The following parameters can be specified to setup namespaces: * **`user`** the container will be able to remap user and group IDs from the host to local users and groups within the container. * **`cgroup`** the container will have an isolated view of the cgroup hierarchy. -* **`path`** *(string, OPTIONAL)* - path to namespace file in the [runtime mount namespace](glossary.md#runtime-namespace) +* **`path`** *(string, OPTIONAL)* - an absolute path to namespace file in the [runtime mount namespace](glossary.md#runtime-namespace). + The runtime MUST place the container process in the namespace associated with that `path`. + The runtime MUST [generate an error](runtime.md#errors) if `path` is not associated with a namespace of type `type`. + + If `path` is not specified, the runtime MUST create a new [container namespace](glossary.md#container-namespace) of type `type`. -If a path is specified, that particular file is used to join that type of namespace. If a namespace type is not specified in the `namespaces` array, the container MUST inherit the [runtime namespace](glossary.md#runtime-namespace) of that type. -If a `namespaces` field contains duplicated namespaces with same `type`, the runtime MUST error out. +If a `namespaces` field contains duplicated namespaces with same `type`, the runtime MUST [generate an error](runtime.md#errors). -###### Example +### Example ```json "namespaces": [ @@ -71,21 +74,21 @@ If a `namespaces` field contains duplicated namespaces with same `type`, the run ] ``` -## User namespace mappings +## User namespace mappings **`uidMappings`** (array of objects, OPTIONAL) describes the user namespace uid mappings from the host to the container. **`gidMappings`** (array of objects, OPTIONAL) describes the user namespace gid mappings from the host to the container. Each entry has the following structure: -* **`hostID`** (uint32, REQUIRED)* - is the starting uid/gid on the host to be mapped to *containerID*. -* **`containerID`** (uint32, REQUIRED)* - is the starting uid/gid in the container. -* **`size`** (uint32, REQUIRED)* - is the number of ids to be mapped. +* **`hostID`** *(uint32, REQUIRED)* - is the starting uid/gid on the host to be mapped to *containerID*. +* **`containerID`** *(uint32, REQUIRED)* - is the starting uid/gid in the container. +* **`size`** *(uint32, REQUIRED)* - is the number of ids to be mapped. The runtime SHOULD NOT modify the ownership of referenced filesystems to realize the mapping. -There is a limit of 5 mappings which is the Linux kernel hard limit. +Note that the number of mapping entries MAY be limited by the [kernel][user-namespaces]. -###### Example +### Example ```json "uidMappings": [ @@ -104,29 +107,29 @@ There is a limit of 5 mappings which is the Linux kernel hard limit. ] ``` -## Devices +## Devices **`devices`** (array of objects, OPTIONAL) lists devices that MUST be available in the container. -The runtime may supply them however it likes (with [mknod][mknod.2], by bind mounting from the runtime mount namespace, etc.). +The runtime MAY supply them however it likes (with [`mknod`][mknod.2], by bind mounting from the runtime mount namespace, etc.). Each entry has the following structure: * **`type`** *(string, REQUIRED)* - type of device: `c`, `b`, `u` or `p`. - More info in [mknod(1)][mknod.1]. + More info in [mknod(1)][mknod.1]. * **`path`** *(string, REQUIRED)* - full path to device inside container. - If a [file][file.1] already exists at `path` that does not match the requested device, the runtime MUST generate an error. -* **`major, minor`** *(int64, REQUIRED unless **`type`** is `p`)* - [major, minor numbers][devices] for the device. + If a [file][] already exists at `path` that does not match the requested device, the runtime MUST generate an error. +* **`major, minor`** *(int64, REQUIRED unless `type` is `p`)* - [major, minor numbers][devices] for the device. * **`fileMode`** *(uint32, OPTIONAL)* - file mode for the device. - You can also control access to devices [with cgroups](#device-whitelist). + You can also control access to devices [with cgroups](#device-whitelist). * **`uid`** *(uint32, OPTIONAL)* - id of device owner. * **`gid`** *(uint32, OPTIONAL)* - id of device group. The same `type`, `major` and `minor` SHOULD NOT be used for multiple devices. -###### Example +### Example ```json - "devices": [ + "devices": [ { "path": "/dev/fuse", "type": "c", @@ -148,7 +151,7 @@ The same `type`, `major` and `minor` SHOULD NOT be used for multiple devices. ] ``` -###### Default Devices +### Default Devices In addition to any devices configured with this setting, the runtime MUST also supply: @@ -158,11 +161,11 @@ In addition to any devices configured with this setting, the runtime MUST also s * [`/dev/random`][random.4] * [`/dev/urandom`][random.4] * [`/dev/tty`][tty.4] -* [`/dev/console`][console.4] is setup if terminal is enabled in the config by bind mounting the pseudoterminal slave to /dev/console. +* [`/dev/console`][console.4] is set up if terminal is enabled in the config by bind mounting the pseudoterminal slave to /dev/console. * [`/dev/ptmx`][pts.4]. A [bind-mount or symlink of the container's `/dev/pts/ptmx`][devpts]. -## Control groups +## Control groups Also known as cgroups, they are used to restrict resource usage for a container and handle device access. cgroups provide controls (through controllers) to restrict cpu, memory, IO, pids and network for the container. @@ -186,28 +189,27 @@ You can configure a container's cgroups via the `resources` field of the Linux c Do not specify `resources` unless limits have to be updated. For example, to run a new process in an existing container without updating limits, `resources` need not be specified. -A runtime MUST at least use the minimum set of cgroup controllers required to fulfill the `resources` settings. -However, a runtime MAY attach the container process to additional cgroup controllers supported by the system. +Runtimes MAY attach the container process to additional cgroup controllers beyond those necessary to fulfill the `resources` settings. -###### Example +### Example ```json - "cgroupsPath": "/myRuntime/myContainer", - "resources": { - "memory": { - "limit": 100000, - "reservation": 200000 - }, - "devices": [ - { - "allow": false, - "access": "rwm" - } - ] + "cgroupsPath": "/myRuntime/myContainer", + "resources": { + "memory": { + "limit": 100000, + "reservation": 200000 + }, + "devices": [ + { + "allow": false, + "access": "rwm" + } + ] } ``` -#### Device whitelist +### Device whitelist **`devices`** (array of objects, OPTIONAL) configures the [device whitelist][cgroup-v1-devices]. The runtime MUST apply entries in the listed order. @@ -216,16 +218,16 @@ Each entry has the following structure: * **`allow`** *(boolean, REQUIRED)* - whether the entry is allowed or denied. * **`type`** *(string, OPTIONAL)* - type of device: `a` (all), `c` (char), or `b` (block). - `null` or unset values mean "all", mapping to `a`. + Unset values mean "all", mapping to `a`. * **`major, minor`** *(int64, OPTIONAL)* - [major, minor numbers][devices] for the device. - `null` or unset values mean "all", mapping to [`*` in the filesystem API][cgroup-v1-devices]. + Unset values mean "all", mapping to [`*` in the filesystem API][cgroup-v1-devices]. * **`access`** *(string, OPTIONAL)* - cgroup permissions for device. - A composition of `r` (read), `w` (write), and `m` (mknod). + A composition of `r` (read), `w` (write), and `m` (mknod). -###### Example +#### Example ```json - "devices": [ + "devices": [ { "allow": false, "access": "rwm" @@ -247,57 +249,37 @@ Each entry has the following structure: ] ``` -#### Disable out-of-memory killer +### Disable out-of-memory killer `disableOOMKiller` contains a boolean (`true` or `false`) that enables or disables the Out of Memory killer for a cgroup. If enabled (`false`), tasks that attempt to consume more memory than they are allowed are immediately killed by the OOM killer. The OOM killer is enabled by default in every cgroup using the `memory` subsystem. To disable it, specify a value of `true`. -For more information, see [the memory cgroup man page][cgroup-v1-memory]. +For more information, see the kernel cgroups documentation about [memory][cgroup-v1-memory]. * **`disableOOMKiller`** *(bool, OPTIONAL)* - enables or disables the OOM killer -###### Example +#### Example ```json "disableOOMKiller": false ``` -#### Set oom_score_adj - -`oomScoreAdj` sets heuristic regarding how the process is evaluated by the kernel during memory pressure. -For more information, see [the proc filesystem documentation section 3.1](https://www.kernel.org/doc/Documentation/filesystems/proc.txt). -This is a kernel/system level setting, where as `disableOOMKiller` is scoped for a memory cgroup. -For more information on how these two settings work together, see [the memory cgroup documentation section 10. OOM Contol][cgroup-v1-memory]. - -* **`oomScoreAdj`** *(int, OPTIONAL)* - adjust the oom-killer score - -###### Example - -```json - "oomScoreAdj": 100 -``` - -#### Memory +### Memory **`memory`** (object, OPTIONAL) represents the cgroup subsystem `memory` and it's used to set limits on the container's memory usage. -For more information, see [the memory cgroup man page][cgroup-v1-memory]. - -The following parameters can be specified to setup the controller: - -* **`limit`** *(int64, OPTIONAL)* - sets limit of memory usage in bytes - -* **`reservation`** *(int64, OPTIONAL)* - sets soft limit of memory usage in bytes +For more information, see the kernel cgroups documentation about [memory][cgroup-v1-memory]. -* **`swap`** *(int64, OPTIONAL)* - sets limit of memory+Swap usage - -* **`kernel`** *(int64, OPTIONAL)* - sets hard limit for kernel memory - -* **`kernelTCP`** *(int64, OPTIONAL)* - sets hard limit in bytes for kernel TCP buffer memory +The following parameters can be specified to set up the controller: +* **`limit`** *(uint64, OPTIONAL)* - sets limit of memory usage in bytes +* **`reservation`** *(uint64, OPTIONAL)* - sets soft limit of memory usage in bytes +* **`swap`** *(uint64, OPTIONAL)* - sets limit of memory+Swap usage +* **`kernel`** *(uint64, OPTIONAL)* - sets hard limit for kernel memory +* **`kernelTCP`** *(uint64, OPTIONAL)* - sets hard limit in bytes for kernel TCP buffer memory * **`swappiness`** *(uint64, OPTIONAL)* - sets swappiness parameter of vmscan (See sysctl's vm.swappiness) -###### Example +#### Example ```json "memory": { @@ -310,28 +292,22 @@ The following parameters can be specified to setup the controller: } ``` -#### CPU +### CPU **`cpu`** (object, OPTIONAL) represents the cgroup subsystems `cpu` and `cpusets`. -For more information, see [the cpusets cgroup man page][cgroup-v1-cpusets]. +For more information, see the kernel cgroups documentation about [cpusets][cgroup-v1-cpusets]. -The following parameters can be specified to setup the controller: +The following parameters can be specified to set up the controller: * **`shares`** *(uint64, OPTIONAL)* - specifies a relative share of CPU time available to the tasks in a cgroup - * **`quota`** *(int64, OPTIONAL)* - specifies the total amount of time in microseconds for which all tasks in a cgroup can run during one period (as defined by **`period`** below) - * **`period`** *(uint64, OPTIONAL)* - specifies a period of time in microseconds for how regularly a cgroup's access to CPU resources should be reallocated (CFS scheduler only) - * **`realtimeRuntime`** *(int64, OPTIONAL)* - specifies a period of time in microseconds for the longest continuous period in which the tasks in a cgroup have access to CPU resources - * **`realtimePeriod`** *(uint64, OPTIONAL)* - same as **`period`** but applies to realtime scheduler only - * **`cpus`** *(string, OPTIONAL)* - list of CPUs the container will run in - * **`mems`** *(string, OPTIONAL)* - list of Memory Nodes the container will run in -###### Example +#### Example ```json "cpu": { @@ -345,36 +321,34 @@ The following parameters can be specified to setup the controller: } ``` -#### Block IO Controller +### Block IO **`blockIO`** (object, OPTIONAL) represents the cgroup subsystem `blkio` which implements the block IO controller. -For more information, see [the kernel cgroups documentation about blkio][cgroup-v1-blkio]. - -The following parameters can be specified to setup the controller: +For more information, see the kernel cgroups documentation about [blkio][cgroup-v1-blkio]. -* **`blkioWeight`** *(uint16, OPTIONAL)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. The range is from 10 to 1000. +The following parameters can be specified to set up the controller: -* **`blkioLeafWeight`** *(uint16, OPTIONAL)* - equivalents of `blkioWeight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. The range is from 10 to 1000. +* **`weight`** *(uint16, OPTIONAL)* - specifies per-cgroup weight. This is default weight of the group on all devices until and unless overridden by per-device rules. +* **`leafWeight`** *(uint16, OPTIONAL)* - equivalents of `weight` for the purpose of deciding how much weight tasks in the given cgroup has while competing with the cgroup's child cgroups. +* **`weightDevice`** *(array of objects, OPTIONAL)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device: + * **`major, minor`** *(int64, REQUIRED)* - major, minor numbers for device. More info in [mknod(1)][mknod.1] man page. + * **`weight`** *(uint16, OPTIONAL)* - bandwidth rate for the device. + * **`leafWeight`** *(uint16, OPTIONAL)* - bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only -* **`blkioWeightDevice`** *(array, OPTIONAL)* - specifies the list of devices which will be bandwidth rate limited. The following parameters can be specified per-device: - * **`major, minor`** *(int64, REQUIRED)* - major, minor numbers for device. More info in `man mknod`. - * **`weight`** *(uint16, OPTIONAL)* - bandwidth rate for the device, range is from 10 to 1000 - * **`leafWeight`** *(uint16, OPTIONAL)* - bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only + You MUST specify at least one of `weight` or `leafWeight` in a given entry, and MAY specify both. - You must specify at least one of `weight` or `leafWeight` in a given entry, and can specify both. - -* **`blkioThrottleReadBpsDevice`**, **`blkioThrottleWriteBpsDevice`**, **`blkioThrottleReadIOPSDevice`**, **`blkioThrottleWriteIOPSDevice`** *(array, OPTIONAL)* - specify the list of devices which will be IO rate limited. - The following parameters can be specified per-device: - * **`major, minor`** *(int64, REQUIRED)* - major, minor numbers for device. More info in `man mknod`. +* **`throttleReadBpsDevice`**, **`throttleWriteBpsDevice`**, **`throttleReadIOPSDevice`**, **`throttleWriteIOPSDevice`** *(array of objects, OPTIONAL)* - specify the list of devices which will be IO rate limited. + The following parameters can be specified per-device: + * **`major, minor`** *(int64, REQUIRED)* - major, minor numbers for device. More info in [mknod(1)][mknod.1] man page. * **`rate`** *(uint64, REQUIRED)* - IO rate limit for the device -###### Example +#### Example ```json "blockIO": { - "blkioWeight": 10, - "blkioLeafWeight": 10, - "blkioWeightDevice": [ + "weight": 10, + "leafWeight": 10, + "weightDevice": [ { "major": 8, "minor": 0, @@ -387,14 +361,14 @@ The following parameters can be specified to setup the controller: "weight": 500 } ], - "blkioThrottleReadBpsDevice": [ + "throttleReadBpsDevice": [ { "major": 8, "minor": 0, "rate": 600 } ], - "blkioThrottleWriteIOPSDevice": [ + "throttleWriteIOPSDevice": [ { "major": 8, "minor": 16, @@ -404,22 +378,21 @@ The following parameters can be specified to setup the controller: } ``` -#### Huge page limits +### Huge page limits **`hugepageLimits`** (array of objects, OPTIONAL) represents the `hugetlb` controller which allows to limit the HugeTLB usage per control group and enforces the controller limit during page fault. -For more information, see the [kernel cgroups documentation about HugeTLB][cgroup-v1-hugetlb]. +For more information, see the kernel cgroups documentation about [HugeTLB][cgroup-v1-hugetlb]. Each entry has the following structure: * **`pageSize`** *(string, REQUIRED)* - hugepage size +* **`limit`** *(uint64, REQUIRED)* - limit in bytes of *hugepagesize* HugeTLB usage -* **`limit`** *(int64, REQUIRED)* - limit in bytes of *hugepagesize* HugeTLB usage - -###### Example +#### Example ```json - "hugepageLimits": [ + "hugepageLimits": [ { "pageSize": "2MB", "limit": 209715200 @@ -427,24 +400,23 @@ Each entry has the following structure: ] ``` -#### Network +### Network **`network`** (object, OPTIONAL) represents the cgroup subsystems `net_cls` and `net_prio`. -For more information, see [the net\_cls cgroup man page][cgroup-v1-net-cls] and [the net\_prio cgroup man page][cgroup-v1-net-prio]. +For more information, see the kernel cgroups documentations about [net\_cls cgroup][cgroup-v1-net-cls] and [net\_prio cgroup][cgroup-v1-net-prio]. -The following parameters can be specified to setup the controller: +The following parameters can be specified to set up the controller: * **`classID`** *(uint32, OPTIONAL)* - is the network class identifier the cgroup's network packets will be tagged with - -* **`priorities`** *(array, OPTIONAL)* - specifies a list of objects of the priorities assigned to traffic originating from processes in the group and egressing the system on various interfaces. - The following parameters can be specified per-priority: - * **`name`** *(string, REQUIRED)* - interface name +* **`priorities`** *(array of objects, OPTIONAL)* - specifies a list of objects of the priorities assigned to traffic originating from processes in the group and egressing the system on various interfaces. + The following parameters can be specified per-priority: + * **`name`** *(string, REQUIRED)* - interface name in [runtime network namespace](glossary.md#runtime-namespace) * **`priority`** *(uint32, REQUIRED)* - priority applied to the interface -###### Example +#### Example ```json - "network": { + "network": { "classID": 1048577, "priorities": [ { @@ -459,114 +431,175 @@ The following parameters can be specified to setup the controller: } ``` -#### PIDs +### PIDs **`pids`** (object, OPTIONAL) represents the cgroup subsystem `pids`. -For more information, see [the pids cgroup man page][cgroup-v1-pids]. +For more information, see the kernel cgroups documentation about [pids][cgroup-v1-pids]. -The following parameters can be specified to setup the controller: +The following parameters can be specified to set up the controller: * **`limit`** *(int64, REQUIRED)* - specifies the maximum number of tasks in the cgroup -###### Example +#### Example ```json - "pids": { + "pids": { "limit": 32771 } ``` -## Sysctl +## IntelRdt + +**`intelRdt`** (object, OPTIONAL) represents the [Intel Resource Director Technology][intel-rdt-cat-kernel-interface]. + If `intelRdt` is set, the runtime MUST write the container process ID to the `/tasks` file in a mounted `resctrl` pseudo-filesystem, using the container ID from [`start`](runtime.md#start) and creating the `` directory if necessary. + If no mounted `resctrl` pseudo-filesystem is available in the [runtime mount namespace](glossary.md#runtime-namespace), the runtime MUST [generate an error](runtime.md#errors). + + If `intelRdt` is not set, the runtime MUST NOT manipulate any `resctrl` psuedo-filesystems. + +The following parameters can be specified for the container: + +* **`l3CacheSchema`** *(string, OPTIONAL)* - specifies the schema for L3 cache id and capacity bitmask (CBM). + If `l3CacheSchema` is set, runtimes MUST write the value to the `schemata` file in the `` directory discussed in `intelRdt`. + + If `l3CacheSchema` is not set, runtimes MUST NOT write to `schemata` files in any `resctrl` psuedo-filesystems. + +### Example + +Consider a two-socket machine with two L3 caches where the default CBM is 0xfffff and the max CBM length is 20 bits. +Tasks inside the container only have access to the "upper" 80% of L3 cache id 0 and the "lower" 50% L3 cache id 1: + +```json +"linux": { + "intelRdt": { + "l3CacheSchema": "L3:0=ffff0;1=3ff" + } +} +``` + +## Sysctl **`sysctl`** (object, OPTIONAL) allows kernel parameters to be modified at runtime for the container. -For more information, see [the man page](http://man7.org/linux/man-pages/man8/sysctl.8.html) +For more information, see the [sysctl(8)][sysctl.8] man page. -###### Example +### Example ```json - "sysctl": { + "sysctl": { "net.ipv4.ip_forward": "1", "net.core.somaxconn": "256" } ``` -## seccomp +## Seccomp Seccomp provides application sandboxing mechanism in the Linux kernel. Seccomp configuration allows one to configure actions to take for matched syscalls and furthermore also allows matching on values passed as arguments to syscalls. -For more information about Seccomp, see [Seccomp kernel documentation](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) -The actions, architectures, and operators are strings that match the definitions in seccomp.h from [libseccomp](https://github.com/seccomp/libseccomp) and are translated to corresponding values. -A valid list of constants as of libseccomp v2.3.0 is shown below. - -Architecture Constants -* `SCMP_ARCH_X86` -* `SCMP_ARCH_X86_64` -* `SCMP_ARCH_X32` -* `SCMP_ARCH_ARM` -* `SCMP_ARCH_AARCH64` -* `SCMP_ARCH_MIPS` -* `SCMP_ARCH_MIPS64` -* `SCMP_ARCH_MIPS64N32` -* `SCMP_ARCH_MIPSEL` -* `SCMP_ARCH_MIPSEL64` -* `SCMP_ARCH_MIPSEL64N32` -* `SCMP_ARCH_PPC` -* `SCMP_ARCH_PPC64` -* `SCMP_ARCH_PPC64LE` -* `SCMP_ARCH_S390` -* `SCMP_ARCH_S390X` - -Action Constants: -* `SCMP_ACT_KILL` -* `SCMP_ACT_TRAP` -* `SCMP_ACT_ERRNO` -* `SCMP_ACT_TRACE` -* `SCMP_ACT_ALLOW` - -Operator Constants: -* `SCMP_CMP_NE` -* `SCMP_CMP_LT` -* `SCMP_CMP_LE` -* `SCMP_CMP_EQ` -* `SCMP_CMP_GE` -* `SCMP_CMP_GT` -* `SCMP_CMP_MASKED_EQ` - -###### Example +For more information about Seccomp, see [Seccomp][seccomp] kernel documentation. +The actions, architectures, and operators are strings that match the definitions in seccomp.h from [libseccomp][] and are translated to corresponding values. + +**`seccomp`** (object, OPTIONAL) + +The following parameters can be specified to set up seccomp: + +* **`defaultAction`** *(string, REQUIRED)* - the default action for seccomp. Allowed values are the same as `syscalls[].action`. + +* **`architectures`** *(array of strings, OPTIONAL)* - the architecture used for system calls. + A valid list of constants as of libseccomp v2.3.2 is shown below. + + * `SCMP_ARCH_X86` + * `SCMP_ARCH_X86_64` + * `SCMP_ARCH_X32` + * `SCMP_ARCH_ARM` + * `SCMP_ARCH_AARCH64` + * `SCMP_ARCH_MIPS` + * `SCMP_ARCH_MIPS64` + * `SCMP_ARCH_MIPS64N32` + * `SCMP_ARCH_MIPSEL` + * `SCMP_ARCH_MIPSEL64` + * `SCMP_ARCH_MIPSEL64N32` + * `SCMP_ARCH_PPC` + * `SCMP_ARCH_PPC64` + * `SCMP_ARCH_PPC64LE` + * `SCMP_ARCH_S390` + * `SCMP_ARCH_S390X` + * `SCMP_ARCH_PARISC` + * `SCMP_ARCH_PARISC64` + +* **`syscalls`** *(array of objects, OPTIONAL)* - match a syscall in seccomp. + + While this property is OPTIONAL, some values of `defaultAction` are not useful without `syscalls` entries. + For example, if `defaultAction` is `SCMP_ACT_KILL` and `syscalls` is empty or unset, the kernel will kill the container process on its first syscall. + + Each entry has the following structure: + + * **`names`** *(array of strings, REQUIRED)* - the names of the syscalls. + `names` MUST contain at least one entry. + * **`action`** *(string, REQUIRED)* - the action for seccomp rules. + A valid list of constants as of libseccomp v2.3.2 is shown below. + + * `SCMP_ACT_KILL` + * `SCMP_ACT_TRAP` + * `SCMP_ACT_ERRNO` + * `SCMP_ACT_TRACE` + * `SCMP_ACT_ALLOW` + + * **`args`** *(array of objects, OPTIONAL)* - the specific syscall in seccomp. + + Each entry has the following structure: + + * **`index`** *(uint, REQUIRED)* - the index for syscall arguments in seccomp. + * **`value`** *(uint64, REQUIRED)* - the value for syscall arguments in seccomp. + * **`valueTwo`** *(uint64, REQUIRED)* - the value for syscall arguments in seccomp. + * **`op`** *(string, REQUIRED)* - the operator for syscall arguments in seccomp. + A valid list of constants as of libseccomp v2.3.2 is shown below. + + * `SCMP_CMP_NE` + * `SCMP_CMP_LT` + * `SCMP_CMP_LE` + * `SCMP_CMP_EQ` + * `SCMP_CMP_GE` + * `SCMP_CMP_GT` + * `SCMP_CMP_MASKED_EQ` + +### Example ```json - "seccomp": { - "defaultAction": "SCMP_ACT_ALLOW", - "architectures": [ - "SCMP_ARCH_X86" - ], - "syscalls": [ - { - "name": "getcwd", - "action": "SCMP_ACT_ERRNO" - } - ] - } + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "architectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "getcwd", + "chmod" + ], + "action": "SCMP_ACT_ERRNO" + } + ] + } ``` -## Rootfs Mount Propagation +## Rootfs Mount Propagation **`rootfsPropagation`** (string, OPTIONAL) sets the rootfs's mount propagation. -Its value is either slave, private, or shared. -[The kernel doc](https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt) has more information about mount propagation. + Its value is either slave, private, shared or unbindable. + The [Shared Subtrees][sharedsubtree] article in the kernel documentation has more information about mount propagation. -###### Example +### Example ```json "rootfsPropagation": "slave", ``` -## Masked Paths +## Masked Paths **`maskedPaths`** (array of strings, OPTIONAL) will mask over the provided paths inside the container so that they cannot be read. -The values MUST be absolute paths in the [container namespace][container-namespace2]. + The values MUST be absolute paths in the [container namespace](glossary.md#container_namespace). -###### Example +### Example ```json "maskedPaths": [ @@ -574,12 +607,12 @@ The values MUST be absolute paths in the [container namespace][container-namespa ] ``` -## Readonly Paths +## Readonly Paths **`readonlyPaths`** (array of strings, OPTIONAL) will set the provided paths as readonly inside the container. -The values MUST be absolute paths in the [container namespace][container-namespace2]. + The values MUST be absolute paths in the [container namespace](glossary.md#container-namespace). -###### Example +### Example ```json "readonlyPaths": [ @@ -587,17 +620,17 @@ The values MUST be absolute paths in the [container namespace][container-namespa ] ``` -## Mount Label +## Mount Label **`mountLabel`** (string, OPTIONAL) will set the Selinux context for the mounts in the container. -###### Example +### Example ```json "mountLabel": "system_u:object_r:svirt_sandbox_file_t:s0:c715,c811" ``` -[container-namespace2]: glossary.md#container_namespace + [cgroup-v1]: https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt [cgroup-v1-blkio]: https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt [cgroup-v1-cpusets]: https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt @@ -608,16 +641,26 @@ The values MUST be absolute paths in the [container namespace][container-namespa [cgroup-v1-net-prio]: https://www.kernel.org/doc/Documentation/cgroup-v1/net_prio.txt [cgroup-v1-pids]: https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt [cgroup-v2]: https://www.kernel.org/doc/Documentation/cgroup-v2.txt -[devices]: https://www.kernel.org/doc/Documentation/devices.txt +[devices]: https://www.kernel.org/doc/Documentation/admin-guide/devices.txt [devpts]: https://www.kernel.org/doc/Documentation/filesystems/devpts.txt -[file.1]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_164 +[file]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_164 +[libseccomp]: https://github.com/seccomp/libseccomp +[procfs]: https://www.kernel.org/doc/Documentation/filesystems/proc.txt +[seccomp]: https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt +[sharedsubtree]: https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt +[sysfs]: https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt +[tmpfs]: https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt -[mknod.1]: http://man7.org/linux/man-pages/man1/mknod.1.html -[mknod.2]: http://man7.org/linux/man-pages/man2/mknod.2.html [console.4]: http://man7.org/linux/man-pages/man4/console.4.html [full.4]: http://man7.org/linux/man-pages/man4/full.4.html +[mknod.1]: http://man7.org/linux/man-pages/man1/mknod.1.html +[mknod.2]: http://man7.org/linux/man-pages/man2/mknod.2.html +[namespaces.7_2]: http://man7.org/linux/man-pages/man7/namespaces.7.html [null.4]: http://man7.org/linux/man-pages/man4/null.4.html [pts.4]: http://man7.org/linux/man-pages/man4/pts.4.html [random.4]: http://man7.org/linux/man-pages/man4/random.4.html +[sysctl.8]: http://man7.org/linux/man-pages/man8/sysctl.8.html [tty.4]: http://man7.org/linux/man-pages/man4/tty.4.html [zero.4]: http://man7.org/linux/man-pages/man4/zero.4.html +[user-namespaces]: http://man7.org/linux/man-pages/man7/user_namespaces.7.html +[intel-rdt-cat-kernel-interface]: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt diff --git a/config-solaris.md b/config-solaris.md index 8343f23f9..4446a166a 100644 --- a/config-solaris.md +++ b/config-solaris.md @@ -1,6 +1,6 @@ # Solaris Application Container Configuration -Solaris application containers can be configured using the following properties, all of the below properties have mappings to properties specified under zonecfg(8) man page, except milestone. +Solaris application containers can be configured using the following properties, all of the below properties have mappings to properties specified under [zonecfg(1M)][zonecfg.1m_2] man page, except milestone. ## milestone The SMF(Service Management Facility) FMRI which should go to "online" state before we start the desired process within the container. @@ -14,7 +14,7 @@ The SMF(Service Management Facility) FMRI which should go to "online" state befo ## limitpriv The maximum set of privileges any process in this container can obtain. -The property should consist of a comma-separated privilege set specification as described in priv_str_to_set(3C) man page for the respective release of Solaris. +The property should consist of a comma-separated privilege set specification as described in [priv_str_to_set(3C)][priv-str-to-set.3c] man page for the respective release of Solaris. **`limitpriv`** *(string, OPTIONAL)* @@ -26,7 +26,7 @@ The property should consist of a comma-separated privilege set specification as ## maxShmMemory The maximum amount of shared memory allowed for this application container. A scale (K, M, G, T) can be applied to the value for each of these numbers (for example, 1M is one megabyte). -Mapped to max-shm-memory in zonecfg(8) man page. +Mapped to `max-shm-memory` in [zonecfg(1M)][zonecfg.1m_2] man page. **`maxShmMemory`** *(string, OPTIONAL)* @@ -40,7 +40,7 @@ Sets a limit on the amount of CPU time that can be used by a container. The unit used translates to the percentage of a single CPU that can be used by all user threads in a container, expressed as a fraction (for example, .75) or a mixed number (whole number and fraction, for example, 1.25). An ncpu value of 1 means 100% of a CPU, a value of 1.25 means 125%, .75 mean 75%, and so forth. When projects within a capped container have their own caps, the minimum value takes precedence. -cappedCPU is mapped to capped-cpu in zonecfg(8) man page. +cappedCPU is mapped to `capped-cpu` in [zonecfg(1M)][zonecfg.1m_2] man page. * **`ncpus`** *(string, OPTIONAL)* @@ -54,7 +54,7 @@ cappedCPU is mapped to capped-cpu in zonecfg(8) man page. ## cappedMemory The physical and swap caps on the memory that can be used by this application container. A scale (K, M, G, T) can be applied to the value for each of these numbers (for example, 1M is one megabyte). -cappedMemory is mapped to capped-memory in zonecfg(8) man page. +cappedMemory is mapped to `capped-memory` in [zonecfg(1M)][zonecfg.1m_2] man page. * **`physical`** *(string, OPTIONAL)* * **`swap`** *(string, OPTIONAL)* @@ -70,34 +70,34 @@ cappedMemory is mapped to capped-memory in zonecfg(8) man page. ## Network ### Automatic Network (anet) -anet is specified as an array that is used to setup networking for Solaris application containers. +anet is specified as an array that is used to set up networking for Solaris application containers. The anet resource represents the automatic creation of a network resource for an application container. The zones administration daemon, zoneadmd, is the primary process for managing the container's virtual platform. -One of the daemons is responsibilities is creation and teardown of the networks for the container. -For more information on the daemon check the zoneadmd(1M) man page. +One of the daemon's responsibilities is creation and teardown of the networks for the container. +For more information on the daemon see the [zoneadmd(1M)][zoneadmd.1m] man page. When such a container is started, a temporary VNIC(Virtual NIC) is automatically created for the container. The VNIC is deleted when the container is torn down. -The following properties can be used to setup automatic networks. -For additional information on properties check zonecfg(8) man page for the respective release of Solaris. +The following properties can be used to set up automatic networks. +For additional information on properties, check the [zonecfg(1M)][zonecfg.1m_2] man page for the respective release of Solaris. * **`linkname`** *(string, OPTIONAL)* Specify a name for the automatically created VNIC datalink. * **`lowerLink`** *(string, OPTIONAL)* Specify the link over which the VNIC will be created. -Mapped to lower-link in the zonecfg(8) man page. -* **`allowedAddress`** *(string, OPTIONAL)* The set of IP addresses that the container can use might be constrained by specifying the allowedAddress property. -If allowedAddress has not been specified, then they can use any IP address on the associated physical interface for the network resource. -Otherwise, when allowedAddress is specified, the container cannot use IP addresses that are not in the allowedAddress list for the physical address. -Mapped to allowed-address in the zonecfg(8) man page. -* **`configureAllowedAddress`** *(string, OPTIONAL)* If configureAllowedAddress is set to true, the addresses specified by allowedAddress are automatically configured on the interface each time the container starts. -When it is set to false, the allowedAddress will not be configured on container start. -Mapped to configure-allowed-address in the zonecfg(8) man page. +Mapped to `lower-link` in the [zonecfg(1M)][zonecfg.1m_2] man page. +* **`allowedAddress`** *(string, OPTIONAL)* The set of IP addresses that the container can use might be constrained by specifying the `allowedAddress` property. + If `allowedAddress` has not been specified, then they can use any IP address on the associated physical interface for the network resource. + Otherwise, when `allowedAddress` is specified, the container cannot use IP addresses that are not in the `allowedAddress` list for the physical address. + Mapped to `allowed-address` in the [zonecfg(1M)][zonecfg.1m_2] man page. +* **`configureAllowedAddress`** *(string, OPTIONAL)* If `configureAllowedAddress` is set to true, the addresses specified by `allowedAddress` are automatically configured on the interface each time the container starts. + When it is set to false, the `allowedAddress` will not be configured on container start. + Mapped to `configure-allowed-address` in the [zonecfg(1M)][zonecfg.1m_2] man page. * **`defrouter`** *(string, OPTIONAL)* The value for the OPTIONAL default router. * **`macAddress`** *(string, OPTIONAL)* Set the VNIC's MAC addresses based on the specified value or keyword. -If not a keyword, it is interpreted as a unicast MAC address. -For a list of the supported keywords please refer to the zonecfg(8) man page of the respective Solaris release. -Mapped to mac-address in the zonecfg(8) man page. + If not a keyword, it is interpreted as a unicast MAC address. + For a list of the supported keywords please refer to the [zonecfg(1M)][zonecfg.1m_2] man page of the respective Solaris release. + Mapped to `mac-address` in the [zonecfg(1M)][zonecfg.1m_2] man page. * **`linkProtection`** *(string, OPTIONAL)* Enables one or more types of link protection using comma-separated values. -See the protection property in dladm(8) for supported values in respective release of Solaris. -Mapped to link-protection in the zonecfg(8) man page. + See the protection property in dladm(8) for supported values in respective release of Solaris. + Mapped to `link-protection` in the [zonecfg(1M)][zonecfg.1m_2] man page. #### Example ```json @@ -113,3 +113,8 @@ Mapped to link-protection in the zonecfg(8) man page. } ] ``` + + +[priv-str-to-set.3c]: http://docs.oracle.com/cd/E86824_01/html/E54766/priv-str-to-set-3c.html +[zoneadmd.1m]: http://docs.oracle.com/cd/E86824_01/html/E54764/zoneadmd-1m.html +[zonecfg.1m_2]: http://docs.oracle.com/cd/E86824_01/html/E54764/zonecfg-1m.html diff --git a/config-windows.md b/config-windows.md index 883998fb8..40b8c41e5 100644 --- a/config-windows.md +++ b/config-windows.md @@ -3,6 +3,22 @@ This document describes the schema for the [Windows-specific section](config.md#platform-specific-configuration) of the [container configuration](config.md). The Windows container specification uses APIs provided by the Windows Host Compute Service (HCS) to fulfill the spec. +## LayerFolders + +**`layerFolders`** (array of strings, REQUIRED) specifies a list of layer folders the container image relies on. The list is ordered from topmost layer to base layer. + `layerFolders` MUST contain at least one entry. + +### Example + +```json + "windows": { + "layerFolders": [ + "C:\\Layers\\layer1", + "C:\\Layers\\layer2" + ] + } +``` + ## Resources You can configure a container's resource limits via the OPTIONAL `resources` field of the Windows configuration. @@ -15,16 +31,13 @@ The following parameters can be specified: * **`limit`** *(uint64, OPTIONAL)* - sets limit of memory usage in bytes. -* **`reservation`** *(uint64, OPTIONAL)* - sets the guaranteed minimum amount of memory for a container in bytes. - #### Example ```json "windows": { "resources": { "memory": { - "limit": 2097152, - "reservation": 524288 + "limit": 2097152 } } } @@ -37,10 +50,8 @@ The following parameters can be specified: The following parameters can be specified: * **`count`** *(uint64, OPTIONAL)* - specifies the number of CPUs available to the container. - -* **`shares`** *(uint16, OPTIONAL)* - specifies the relative weight to other containers with CPU shares. The range is from 1 to 10000. - -* **`percent`** *(uint, OPTIONAL)* - specifies the percentage of available CPUs usable by the container. +* **`shares`** *(uint16, OPTIONAL)* - specifies the relative weight to other containers with CPU shares. +* **`maximum`** *(uint, OPTIONAL)* - specifies the portion of processor cycles that this container can use as a percentage times 100. #### Example @@ -48,7 +59,7 @@ The following parameters can be specified: "windows": { "resources": { "cpu": { - "percent": 50 + "maximum": 5000 } } } @@ -61,9 +72,7 @@ The following parameters can be specified: The following parameters can be specified: * **`iops`** *(uint64, OPTIONAL)* - specifies the maximum IO operations per second for the system drive of the container. - * **`bps`** *(uint64, OPTIONAL)* - specifies the maximum bytes per second for the system drive of the container. - * **`sandboxSize`** *(uint64, OPTIONAL)* - specifies the minimum size of the system drive in bytes. #### Example @@ -78,22 +87,89 @@ The following parameters can be specified: } ``` -### Network +## Network -`network` is an OPTIONAL configuration for the container's network usage. +You can configure a container's networking options via the OPTIONAL `network` field of the Windows configuration. The following parameters can be specified: -* **`egressBandwidth`** *(uint64, OPTIONAL)* - specified the maximum egress bandwidth in bytes per second for the container. +* **`endpointList`** *(array of strings, OPTIONAL)* - list of HNS (Host Network Service) endpoints that the container should connect to. +* **`allowUnqualifiedDNSQuery`** *(bool, OPTIONAL)* - specifies if unqualified DNS name resolution is allowed. +* **`DNSSearchList`** *(array of strings, OPTIONAL)* - comma separated list of DNS suffixes to use for name resolution. +* **`networkSharedContainerName`** *(string, OPTIONAL)* - name (ID) of the container that we will share with the network stack. -#### Example +### Example ```json "windows": { - "resources": { - "network": { - "egressBandwidth": 1048577 - } + "network": { + "endpointList": [ + "7a010682-17e0-4455-a838-02e5d9655fe6" + ], + "allowUnqualifiedDNSQuery": true, + "DNSSearchList": [ + "a.com", + "b.com" + ], + "networkSharedContainerName": "containerName" } } ``` + +## Credential Spec + +You can configure a container's group Managed Service Account (gMSA) via the OPTIONAL `credentialSpec` field of the Windows configuration. +The `credentialSpec` is a JSON object whose properties are implementation-defined. +For more information about gMSAs, see [Active Directory Service Accounts for Windows Containers][gMSAOverview]. +For more information about tooling to generate a gMSA, see [Deployment Overview][gMSATooling]. + + +[gMSAOverview]: https://aka.ms/windowscontainers/manage-serviceaccounts +[gMSATooling]: https://aka.ms/windowscontainers/credentialspec-tools + +## Servicing + +When a container terminates, the Host Compute Service indicates if a Windows update servicing operation is pending. +You can indicate that a container should be started in a mode to apply pending servicing operations via the OPTIONAL `servicing` field of the Windows configuration. + +### Example + +```json + "windows": { + "servicing": true + } +``` + +## IgnoreFlushesDuringBoot + +You can indicate that a container should be started in an a mode where disk flushes are not performed during container boot via the OPTIONAL `ignoreFlushesDuringBoot` field of the Windows configuration. + +### Example + +```json + "windows": { + "ignoreFlushesDuringBoot": true + } +``` + +## HyperV + +`hyperv` is an OPTIONAL field of the Windows configuration. +If present, the container MUST be run with Hyper-V isolation. +If omitted, the container MUST be run as a Windows Server container. + +The following parameters can be specified: + +* **`utilityVMPath`** *(string, OPTIONAL)* - specifies the path to the image used for the utility VM. + This would be specified if using a base image which does not contain a utility VM image. + If not supplied, the runtime will search the container filesystem layers from the bottom-most layer upwards, until it locates "UtilityVM", and default to that path. + +### Example + +```json + "windows": { + "hyperv": { + "utilityVMPath": "C:\\path\\to\\utilityvm" + } + } +``` diff --git a/config.md b/config.md index 6f1d1dea7..2d99c4b25 100644 --- a/config.md +++ b/config.md @@ -1,22 +1,22 @@ -# Container Configuration file +# Container Configuration file -The container's top-level directory MUST contain a configuration file called `config.json`. -The canonical schema is defined in this document, but there is a JSON Schema in [`schema/config-schema.json`](schema/config-schema.json) and Go bindings in [`specs-go/config.go`](specs-go/config.go). -Platform-specific configuration schema are defined in the [platform-specific documents](#platform-specific-configuration) linked below. - -The configuration file contains metadata necessary to implement standard operations against the container. +This configuration file contains metadata necessary to implement [standard operations](runtime.md#operations) against the container. This includes the process to run, environment variables to inject, sandboxing features to use, etc. +The canonical schema is defined in this document, but there is a JSON Schema in [`schema/config-schema.json`](schema/config-schema.json) and Go bindings in [`specs-go/config.go`](specs-go/config.go). +[Platform](spec.md#platforms)-specific configuration schema are defined in the [platform-specific documents](#platform-specific-configuration) linked below. +For properties that are only defined for some [platforms](spec.md#platforms), the Go property has a `platform` tag listing those protocols (e.g. `platform:"linux,solaris"`). + Below is a detailed description of each field defined in the configuration format and valid values are specified. Platform-specific fields are identified as such. For all platform-specific configuration values, the scope defined below in the [Platform-specific configuration](#platform-specific-configuration) section applies. -## Specification version +## Specification version -* **`ociVersion`** (string, REQUIRED) MUST be in [SemVer v2.0.0](http://semver.org/spec/v2.0.0.html) format and specifies the version of the Open Container Runtime Specification with which the bundle complies. -The Open Container Runtime Specification follows semantic versioning and retains forward and backward compatibility within major versions. -For example, if a configuration is compliant with version 1.1 of this specification, it is compatible with all runtimes that support any 1.1 or later release of this specification, but is not compatible with a runtime that supports 1.0 and not 1.1. +* **`ociVersion`** (string, REQUIRED) MUST be in [SemVer v2.0.0][semver-v2.0.0] format and specifies the version of the Open Container Runtime Specification with which the bundle complies. + The Open Container Runtime Specification follows semantic versioning and retains forward and backward compatibility within major versions. + For example, if a configuration is compliant with version 1.1 of this specification, it is compatible with all runtimes that support any 1.1 or later release of this specification, but is not compatible with a runtime that supports 1.0 and not 1.1. ### Example @@ -24,17 +24,24 @@ For example, if a configuration is compliant with version 1.1 of this specificat "ociVersion": "0.1.0" ``` -## Root Configuration +## Root **`root`** (object, REQUIRED) specifies the container's root filesystem. -* **`path`** (string, REQUIRED) Specifies the path to the root filesystem for the container. - The path is either an absolute path or a relative path to the bundle. - On Linux, for example, with a bundle at `/to/bundle` and a root filesystem at `/to/bundle/rootfs`, the `path` value can be either `/to/bundle/rootfs` or `rootfs`. - A directory MUST exist at the path declared by the field. +* **`path`** (string, OPTIONAL) Specifies the path to the root filesystem for the container. + The path is either an absolute path or a relative path to the bundle. + + * On Windows, for Windows Server Containers, this field is REQUIRED and MUST be specified as a [volume GUID path][naming-a-volume]. + For Hyper-V Containers, this field MUST be omitted. + * On all other platforms, this field is REQUIRED. + The value SHOULD be the conventional `rootfs`. + * On Linux, for example, with a bundle at `/to/bundle` and a root filesystem at `/to/bundle/rootfs`, the `path` value can be either `/to/bundle/rootfs` or `rootfs`. + + If defined, a directory MUST exist at the path declared by the field. * **`readonly`** (bool, OPTIONAL) If true then the root filesystem MUST be read-only inside the container, defaults to false. + * On Windows, this field MUST be omitted or false. -### Example +### Example (POSIX) ```json "root": { @@ -43,29 +50,53 @@ For example, if a configuration is compliant with version 1.1 of this specificat } ``` -## Mounts +### Example (Windows) + +```json +"root": { + "path": "\\\\?\\Volume{ec84d99e-3f02-11e7-ac6c-00155d7682cf}\\" +} +``` -**`mounts`** (array, OPTIONAL) specifies additional mounts beyond [`root`](#root-configuration). -The runtime MUST mount entries in the listed order. -For Linux, the parameters are as documented in [the mount system call](http://man7.org/linux/man-pages/man2/mount.2.html). -For Solaris, the mount entry corresponds to the 'fs' resource in zonecfg(8). -For Windows, see links for details about [mountvol](http://ss64.com/nt/mountvol.html) and [SetVolumeMountPoint](https://msdn.microsoft.com/en-us/library/windows/desktop/aa365561(v=vs.85).aspx). +## Mounts +**`mounts`** (array of objects, OPTIONAL) specifies additional mounts beyond [`root`](#root). + The runtime MUST mount entries in the listed order. + For Linux, the parameters are as documented in [mount(2)][mount.2] system call man page. + For Solaris, the mount entry corresponds to the 'fs' resource in the [zonecfg(1M)][zonecfg.1m] man page. * **`destination`** (string, REQUIRED) Destination of mount point: path inside container. - This value MUST be an absolute path. - * Windows: one mount destination MUST NOT be nested within another mount (e.g., c:\\foo and c:\\foo\\bar). - * Solaris: corresponds to "dir" of the fs resource in zonecfg(8). -* **`type`** (string, REQUIRED) The filesystem type of the filesystem to be mounted. - * Linux: valid *filesystemtype* supported by the kernel as listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). - * Windows: the type of file system on the volume, e.g. "ntfs". - * Solaris: corresponds to "type" of the fs resource in zonecfg(8). -* **`source`** (string, REQUIRED) A device name, but can also be a directory name or a dummy. - * Windows: the volume name that is the target of the mount point, \\?\Volume\{GUID}\ (on Windows source is called target). - * Solaris: corresponds to "special" of the fs resource in zonecfg(8). -* **`options`** (list of strings, OPTIONAL) Mount options of the filesystem to be used. - * Linux: [supported][mount.8-filesystem-independent] [options][mount.8-filesystem-specific] are listed in [mount(8)][mount.8]. - * Solaris: corresponds to "options" of the fs resource in zonecfg(8). + This value MUST be an absolute path. + * Windows: one mount destination MUST NOT be nested within another mount (e.g., c:\\foo and c:\\foo\\bar). + * Solaris: corresponds to "dir" of the fs resource in [zonecfg(1M)][zonecfg.1m]. +* **`source`** (string, OPTIONAL) A device name, but can also be a directory name or a dummy. + Path values are either absolute or relative to the bundle. + * Windows: a local directory on the filesystem of the container host. UNC paths and mapped drives are not supported. + * Solaris: corresponds to "special" of the fs resource in [zonecfg(1M)][zonecfg.1m]. +* **`options`** (array of strings, OPTIONAL) Mount options of the filesystem to be used. + * Linux: supported options are listed in the [mount(8)][mount.8] man page. + Note both [filesystem-independent][mount.8-filesystem-independent] and [filesystem-specific][mount.8-filesystem-specific] options are listed. + * Solaris: corresponds to "options" of the fs resource in [zonecfg(1M)][zonecfg.1m]. + +### Example (Windows) + +```json +"mounts": [ + { + "destination": "C:\\folder-inside-container", + "source": "C:\\folder-on-host", + "options": [] + } +] +``` + +### Linux and Solaris Mounts + +For Linux and Solaris based systems the mounts structure has the following fields: + +* **`type`** (string, OPTIONAL) The type of the filesystem to be mounted. + * Linux: filesystem types supported by the kernel as listed in */proc/filesystems* (e.g., "minix", "ext2", "ext3", "jfs", "xfs", "reiserfs", "msdos", "proc", "nfs", "iso9660"). + * Solaris: corresponds to "type" of the fs resource in [zonecfg(1M)][zonecfg.1m]. ### Example (Linux) @@ -86,19 +117,6 @@ For Windows, see links for details about [mountvol](http://ss64.com/nt/mountvol. ] ``` -### Example (Windows) - -```json -"mounts": [ - "myfancymountpoint": { - "destination": "C:\\Users\\crosbymichael\\My Fancy Mount Point\\", - "type": "ntfs", - "source": "\\\\?\\Volume\\{2eca078d-5cbc-43d3-aff8-7e8511f60d0e}\\", - "options": [] - } -] -``` - ### Example (Solaris) ```json @@ -117,51 +135,69 @@ For Windows, see links for details about [mountvol](http://ss64.com/nt/mountvol. ] ``` -## Process +## Process -**`process`** (object, REQUIRED) specifies the container process. +**`process`** (object, OPTIONAL) specifies the container process. + This property is REQUIRED when [`start`](runtime.md#start) is called. -* **`terminal`** (bool, OPTIONAL) specifies whether a terminal is attached to that process, defaults to false. - As an example, if set to true on Linux a pseudoterminal pair is allocated for the container process and the pseudoterminal slave is duplicated on the container process's [standard streams][stdin.3]. -* **`consoleSize`** (object, OPTIONAL) specifies the console size of the terminal if attached, containing the following properties: - * **`height`** (uint, REQUIRED) - * **`width`** (uint, REQUIRED) +* **`terminal`** (bool, OPTIONAL) specifies whether a terminal is attached to the process, defaults to false. + As an example, if set to true on Linux a pseudoterminal pair is allocated for the process and the pseudoterminal slave is duplicated on the process's [standard streams][stdin.3]. +* **`consoleSize`** (object, OPTIONAL) specifies the console size in characters of the terminal. + Runtimes MUST ignore `consoleSize` if `terminal` is `false` or unset. + * **`height`** (uint, REQUIRED) + * **`width`** (uint, REQUIRED) * **`cwd`** (string, REQUIRED) is the working directory that will be set for the executable. - This value MUST be an absolute path. -* **`env`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2001's `environ`][ieee-1003.1-2001-xbd-c8.1]. -* **`args`** (array of strings, REQUIRED) with similar semantics to [IEEE Std 1003.1-2001 `execvp`'s *argv*][ieee-1003.1-2001-xsh-exec]. - This specification extends the IEEE standard in that at least one entry is REQUIRED, and that entry is used with the same semantics as `execvp`'s *file*. -* **`capabilities`** (array of strings, OPTIONAL) is an array that specifies the set of capabilities of the process(es) inside the container. Valid values are platform-specific. For example, valid values for Linux are defined in the [CAPABILITIES(7)](http://man7.org/linux/man-pages/man7/capabilities.7.html) man page. -* **`rlimits`** (array of objects, OPTIONAL) allows setting resource limits for a process inside the container. - Each entry has the following structure: - - * **`type`** (string, REQUIRED) - the platform resource being limited, for example on Linux as defined in the [SETRLIMIT(2)](http://man7.org/linux/man-pages/man2/setrlimit.2.html) man page. + This value MUST be an absolute path. +* **`env`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2008's `environ`][ieee-1003.1-2008-xbd-c8.1]. +* **`args`** (array of strings, REQUIRED) with similar semantics to [IEEE Std 1003.1-2008 `execvp`'s *argv*][ieee-1003.1-2008-xsh-exec]. + This specification extends the IEEE standard in that at least one entry is REQUIRED, and that entry is used with the same semantics as `execvp`'s *file*. +* **`capabilities`** (object, OPTIONAL) is an object containing arrays that specifies the sets of capabilities for the process. + Valid values are platform-specific. + For example, valid values for Linux are defined in the [capabilities(7)][capabilities.7] man page, such as `CAP_CHOWN`. + Any value which cannot be mapped to a relevant kernel interface MUST cause an error. + `capabilities` contains the following properties: + * **`effective`** (array of strings, OPTIONAL) - the `effective` field is an array of effective capabilities that are kept for the process. + * **`bounding`** (array of strings, OPTIONAL) - the `bounding` field is an array of bounding capabilities that are kept for the process. + * **`inheritable`** (array of strings, OPTIONAL) - the `inheritable` field is an array of inheritable capabilities that are kept for the process. + * **`permitted`** (array of strings, OPTIONAL) - the `permitted` field is an array of permitted capabilities that are kept for the process. + * **`ambient`** (array of strings, OPTIONAL) - the `ambient` field is an array of ambient capabilities that are kept for the process. +* **`rlimits`** (array of objects, OPTIONAL) allows setting resource limits for the process. + Each entry has the following structure: + + * **`type`** (string, REQUIRED) - the platform resource being limited, for example on Linux as defined in the [setrlimit(2)][setrlimit.2] man page. * **`soft`** (uint64, REQUIRED) - the value of the limit enforced for the corresponding resource. - * **`hard`** (uint64, REQUIRED) - the ceiling for the soft limit that could be set by an unprivileged process. Only a privileged process (e.g. under Linux: one with the CAP_SYS_RESOURCE capability) can raise a hard limit. + * **`hard`** (uint64, REQUIRED) - the ceiling for the soft limit that could be set by an unprivileged process. + Only a privileged process (e.g. under Linux: one with the CAP_SYS_RESOURCE capability) can raise a hard limit. If `rlimits` contains duplicated entries with same `type`, the runtime MUST error out. -* **`noNewPrivileges`** (bool, OPTIONAL) setting `noNewPrivileges` to true prevents the processes in the container from gaining additional privileges. - As an example, the ['no_new_privs' kernel doc](https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt) has more information on how this is achieved using a prctl system call on Linux. +* **`noNewPrivileges`** (bool, OPTIONAL) setting `noNewPrivileges` to true prevents the process from gaining additional privileges. + As an example, the ['no_new_privs'][no-new-privs] article in the kernel documentation has information on how this is achieved using a prctl system call on Linux. + +For Linux-based systems the process structure supports the following process-specific fields. -For Linux-based systems the process structure supports the following process specific fields. +* **`apparmorProfile`** (string, OPTIONAL) specifies the name of the AppArmor profile for the process. + For more information about AppArmor, see [AppArmor documentation][apparmor]. +* **`oomScoreAdj`** *(int, OPTIONAL)* adjusts the oom-killer score in `[pid]/oom_score_adj` for the process's `[pid]` in a [proc pseudo-filesystem][procfs]. + If `oomScoreAdj` is set, the runtime MUST set `oom_score_adj` to the given value. + If `oomScoreAdj` is not set, the runtime MUST NOT change the value of `oom_score_adj`. -* **`apparmorProfile`** (string, OPTIONAL) specifies the name of the AppArmor profile to be applied to processes in the container. - For more information about AppArmor, see [AppArmor documentation](https://wiki.ubuntu.com/AppArmor) -* **`selinuxLabel`** (string, OPTIONAL) specifies the SELinux label to be applied to the processes in the container. - For more information about SELinux, see [SELinux documentation](http://selinuxproject.org/page/Main_Page) + This is a per-process setting, where as [`disableOOMKiller`](config-linux.md#disable-out-of-memory-killer) is scoped for a memory cgroup. + For more information on how these two settings work together, see [the memory cgroup documentation section 10. OOM Contol][cgroup-v1-memory_2]. +* **`selinuxLabel`** (string, OPTIONAL) specifies the SELinux label for the process. + For more information about SELinux, see [SELinux documentation][selinux]. -### User +### User The user for the process is a platform-specific structure that allows specific control over which user the process runs as. -#### Linux and Solaris User +#### Linux and Solaris User For Linux and Solaris based systems the user structure has the following fields: -* **`uid`** (int, REQUIRED) specifies the user ID in the [container namespace][container-namespace]. -* **`gid`** (int, REQUIRED) specifies the group ID in the [container namespace][container-namespace]. -* **`additionalGids`** (array of ints, OPTIONAL) specifies additional group IDs (in the [container namespace][container-namespace]) to be added to the process. +* **`uid`** (int, REQUIRED) specifies the user ID in the [container namespace](glossary.md#container-namespace). +* **`gid`** (int, REQUIRED) specifies the group ID in the [container namespace](glossary.md#container-namespace). +* **`additionalGids`** (array of ints, OPTIONAL) specifies additional group IDs in the [container namespace](glossary.md#container-namespace) to be added to the process. _Note: symbolic name for uid and gid, such as uname and gname respectively, are left to upper levels to derive (i.e. `/etc/passwd` parsing, NSS, etc)_ @@ -190,11 +226,30 @@ _Note: symbolic name for uid and gid, such as uname and gname respectively, are "apparmorProfile": "acme_secure_profile", "selinuxLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675", "noNewPrivileges": true, - "capabilities": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL" + ], + "ambient": [ + "CAP_NET_BIND_SERVICE" + ] + }, "rlimits": [ { "type": "RLIMIT_NOFILE", @@ -229,7 +284,7 @@ _Note: symbolic name for uid and gid, such as uname and gname respectively, are } ``` -#### Windows User +#### Windows User For Windows based systems the user structure has the following fields: @@ -254,11 +309,11 @@ For Windows based systems the user structure has the following fields: ``` -## Hostname +## Hostname * **`hostname`** (string, OPTIONAL) specifies the container's hostname as seen by processes running inside the container. - On Linux, for example, this will change the hostname in the [container][container-namespace] [UTS namespace][uts-namespace]. - Depending on your [namespace configuration](config-linux.md#namespaces), the container UTS namespace may be the [runtime UTS namespace][runtime-namespace]. + On Linux, for example, this will change the hostname in the [container](glossary.md#container-namespace) [UTS namespace][uts-namespace.7]. + Depending on your [namespace configuration](config-linux.md#namespaces), the container UTS namespace may be the [runtime](glossary.md#runtime-namespace) [UTS namespace][uts-namespace.7]. ### Example @@ -266,99 +321,65 @@ For Windows based systems the user structure has the following fields: "hostname": "mrsdalloway" ``` -## Platform - -**`platform`** specifies the configuration's target platform. - -* **`os`** (string, REQUIRED) specifies the operating system family of the container configuration's specified [`root`](#root-configuration) file system bundle. - The runtime MUST generate an error if it does not support the specified **`os`**. - Bundles SHOULD use, and runtimes SHOULD understand, **`os`** entries listed in the Go Language document for [`$GOOS`][go-environment]. - If an operating system is not included in the `$GOOS` documentation, it SHOULD be submitted to this specification for standardization. -* **`arch`** (string, REQUIRED) specifies the instruction set for which the binaries in the specified [`root`](#root-configuration) file system bundle have been compiled. - The runtime MUST generate an error if it does not support the specified **`arch`**. - Values for **`arch`** SHOULD use, and runtimes SHOULD understand, **`arch`** entries listed in the Go Language document for [`$GOARCH`][go-environment]. - If an architecture is not included in the `$GOARCH` documentation, it SHOULD be submitted to this specification for standardization. - -### Example - -```json -"platform": { - "os": "linux", - "arch": "amd64" -} -``` - -## Platform-specific configuration - -[**`platform.os`**](#platform) is used to specify platform-specific configuration. -Runtime implementations MAY support any valid values for platform-specific fields as part of this configuration. -Implementations MUST error out when invalid values are encountered and MUST generate an error message and error out when encountering valid values it chooses to not support. +## Platform-specific configuration * **`linux`** (object, OPTIONAL) [Linux-specific configuration](config-linux.md). - This MAY be set if **`platform.os`** is `linux` and MUST NOT be set otherwise. + This MAY be set if the target platform of this spec is `linux`. * **`windows`** (object, OPTIONAL) [Windows-specific configuration](config-windows.md). - This MAY be set if **`platform.os`** is `windows` and MUST NOT be set otherwise. + This MUST be set if the target platform of this spec is `windows`. * **`solaris`** (object, OPTIONAL) [Solaris-specific configuration](config-solaris.md). - This MAY be set if **`platform.os`** is `solaris` and MUST NOT be set otherwise. + This MAY be set if the target platform of this spec is `solaris`. ### Example (Linux) ```json { - "platform": { - "os": "linux", - "arch": "amd64" - }, "linux": { "namespaces": [ - { - "type": "pid" - } + { + "type": "pid" + } ] } } ``` -## Hooks +## Linux and Solaris Hooks -Hooks allow for the configuration of custom actions related to the [lifecycle](runtime.md#lifecycle) of the container. +For Linux- and Solaris-based systems, the configuration structure supports `hooks` for configuring custom actions related to the [lifecycle](runtime.md#lifecycle) of the container. * **`hooks`** (object, OPTIONAL) MAY contain any of the following properties: - * **`prestart`** (array, OPTIONAL) is an array of [pre-start hooks](#prestart). - Entries in the array contain the following properties: - * **`path`** (string, REQUIRED) with similar semantics to [IEEE Std 1003.1-2001 `execv`'s *path*][ieee-1003.1-2001-xsh-exec]. - This specification extends the IEEE standard in that **`path`** MUST be absolute. - * **`args`** (array of strings, REQUIRED) with the same semantics as [IEEE Std 1003.1-2001 `execv`'s *argv*][ieee-1003.1-2001-xsh-exec]. - * **`env`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2001's `environ`][ieee-1003.1-2001-xbd-c8.1]. - * **`timeout`** (int, OPTIONAL) is the number of seconds before aborting the hook. - * **`poststart`** (array, OPTIONAL) is an array of [post-start hooks](#poststart). - Entries in the array have the same schema as pre-start entries. - * **`poststop`** (array, OPTIONAL) is an array of [post-stop hooks](#poststop). - Entries in the array have the same schema as pre-start entries. + * **`prestart`** (array of objects, OPTIONAL) is an array of [pre-start hooks](#prestart). + Entries in the array contain the following properties: + * **`path`** (string, REQUIRED) with similar semantics to [IEEE Std 1003.1-2008 `execv`'s *path*][ieee-1003.1-2008-functions-exec]. + This specification extends the IEEE standard in that **`path`** MUST be absolute. + * **`args`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2008 `execv`'s *argv*][ieee-1003.1-2008-functions-exec]. + * **`env`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2008's `environ`][ieee-1003.1-2008-xbd-c8.1]. + * **`timeout`** (int, OPTIONAL) is the number of seconds before aborting the hook. + If set, `timeout` MUST be greater than zero. + * **`poststart`** (array of objects, OPTIONAL) is an array of [post-start hooks](#poststart). + Entries in the array have the same schema as pre-start entries. + * **`poststop`** (array of objects, OPTIONAL) is an array of [post-stop hooks](#poststop). + Entries in the array have the same schema as pre-start entries. Hooks allow users to specify programs to run before or after various lifecycle events. Hooks MUST be called in the listed order. The [state](runtime.md#state) of the container MUST be passed to hooks over stdin so that they may do work appropriate to the current state of the container. -### Prestart +### Prestart -The pre-start hooks MUST be called after the container has been created, but before the user supplied command is executed. +The pre-start hooks MUST be called after the [`start`](runtime.md#start) operation is called but [before the user-specified program command is executed](runtime.md#lifecycle). On Linux, for example, they are called after the container namespaces are created, so they provide an opportunity to customize the container (e.g. the network namespace could be specified in this hook). -If a hook returns a non-zero exit code, an error including the exit code and the stderr MUST be returned to the caller and the container MUST be destroyed. - -### Poststart +### Poststart -The post-start hooks MUST be called after the user process is started. +The post-start hooks MUST be called [after the user-specified process is executed](runtime.md#lifecycle) but before the [`start`](runtime.md#start) operation returns. For example, this hook can notify the user that the container process is spawned. -If a hook returns a non-zero exit code, then an error MUST be logged and the remaining hooks are executed. +### Poststop -### Poststop - -The post-stop hooks MUST be called after the container process is stopped. +The post-stop hooks MUST be called [after the container is deleted](runtime.md#lifecycle) but before the [`delete`](runtime.md#delete) operation returns. Cleanup or debugging functions are examples of such a hook. -If a hook returns a non-zero exit code, then an error MUST be logged and the remaining hooks are executed. ### Example @@ -389,22 +410,21 @@ If a hook returns a non-zero exit code, then an error MUST be logged and the rem } ``` -## Annotations +## Annotations **`annotations`** (object, OPTIONAL) contains arbitrary metadata for the container. -This information MAY be structured or unstructured. -Annotations MUST be a key-value map. -If there are no annotations then this property MAY either be absent or an empty map. + This information MAY be structured or unstructured. + Annotations MUST be a key-value map. + If there are no annotations then this property MAY either be absent or an empty map. -Keys MUST be strings. -Keys MUST be unique within this map. -Keys MUST NOT be an empty string. -Keys SHOULD be named using a reverse domain notation - e.g. `com.example.myKey`. -Keys using the `org.opencontainers` namespace are reserved and MUST NOT be used by subsequent specifications. -Implementations that are reading/processing this configuration file MUST NOT generate an error if they encounter an unknown annotation key. + Keys MUST be strings. + Keys MUST NOT be an empty string. + Keys SHOULD be named using a reverse domain notation - e.g. `com.example.myKey`. + Keys using the `org.opencontainers` namespace are reserved and MUST NOT be used by subsequent specifications. + Implementations that are reading/processing this configuration file MUST NOT generate an error if they encounter an unknown annotation key. -Values MUST be strings. -Values MAY be an empty string. + Values MUST be strings. + Values MAY be an empty string. ```json "annotations": { @@ -412,9 +432,16 @@ Values MAY be an empty string. } ``` -## Extensibility -Implementations that are reading/processing this configuration file MUST NOT generate an error if they encounter an unknown property. +## Extensibility + +Runtimes that are reading or processing this configuration file MUST NOT generate an error if they encounter an unknown property. Instead they MUST ignore unknown properties. +Properties defined for the [target platform](spec.md#platforms) by the [declared version](#specification-version) of this specification MUST NOT be ignored. + +## Valid values + +Runtimes that are reading or processing this configuration file MUST generate an error when invalid or unsupported values are encountered. +Unless support for a valid value is explicitly required, runtimes MAY choose which subset of the valid values it will support. ## Configuration Schema Example @@ -423,10 +450,6 @@ Here is a full example `config.json` for reference. ```json { "ociVersion": "0.5.0-dev", - "platform": { - "os": "linux", - "arch": "amd64" - }, "process": { "terminal": true, "user": { @@ -445,11 +468,30 @@ Here is a full example `config.json` for reference. "TERM=xterm" ], "cwd": "/", - "capabilities": [ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE" - ], + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL" + ], + "ambient": [ + "CAP_NET_BIND_SERVICE" + ] + }, "rlimits": [ { "type": "RLIMIT_CORE", @@ -463,6 +505,7 @@ Here is a full example `config.json` for reference. } ], "apparmorProfile": "acme_secure_profile", + "oomScoreAdj": 100, "selinuxLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675", "noNewPrivileges": true }, @@ -642,7 +685,6 @@ Here is a full example `config.json` for reference. "limit": 9223372036854772000 } ], - "oomScoreAdj": 100, "memory": { "limit": 536870912, "reservation": 536870912, @@ -682,9 +724,9 @@ Here is a full example `config.json` for reference. } ], "blockIO": { - "blkioWeight": 10, - "blkioLeafWeight": 10, - "blkioWeightDevice": [ + "weight": 10, + "leafWeight": 10, + "weightDevice": [ { "major": 8, "minor": 0, @@ -697,14 +739,14 @@ Here is a full example `config.json` for reference. "weight": 500 } ], - "blkioThrottleReadBpsDevice": [ + "throttleReadBpsDevice": [ { "major": 8, "minor": 0, "rate": 600 } ], - "blkioThrottleWriteIOPSDevice": [ + "throttleWriteIOPSDevice": [ { "major": 8, "minor": 16, @@ -717,11 +759,15 @@ Here is a full example `config.json` for reference. "seccomp": { "defaultAction": "SCMP_ACT_ALLOW", "architectures": [ - "SCMP_ARCH_X86" + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" ], "syscalls": [ { - "name": "getcwd", + "names": [ + "getcwd", + "chmod" + ], "action": "SCMP_ACT_ERRNO" } ] @@ -772,13 +818,23 @@ Here is a full example `config.json` for reference. } ``` -[container-namespace]: glossary.md#container-namespace -[go-environment]: https://golang.org/doc/install/source#environment -[ieee-1003.1-2001-xbd-c8.1]: http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html#tag_08_01 -[ieee-1003.1-2001-xsh-exec]: http://pubs.opengroup.org/onlinepubs/009695399/functions/exec.html -[runtime-namespace]: glossary.md#runtime-namespace -[uts-namespace]: http://man7.org/linux/man-pages/man7/namespaces.7.html + +[apparmor]: https://wiki.ubuntu.com/AppArmor +[cgroup-v1-memory_2]: https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt +[selinux]:http://selinuxproject.org/page/Main_Page +[no-new-privs]: https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt +[procfs_2]: https://www.kernel.org/doc/Documentation/filesystems/proc.txt +[semver-v2.0.0]: http://semver.org/spec/v2.0.0.html +[ieee-1003.1-2008-xbd-c8.1]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_01 +[ieee-1003.1-2008-xsh-exec]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html +[naming-a-volume]: https://aka.ms/nb3hqb + +[capabilities.7]: http://man7.org/linux/man-pages/man7/capabilities.7.html +[mount.2]: http://man7.org/linux/man-pages/man2/mount.2.html +[mount.8]: http://man7.org/linux/man-pages/man8/mount.8.html [mount.8-filesystem-independent]: http://man7.org/linux/man-pages/man8/mount.8.html#FILESYSTEM-INDEPENDENT_MOUNT%20OPTIONS [mount.8-filesystem-specific]: http://man7.org/linux/man-pages/man8/mount.8.html#FILESYSTEM-SPECIFIC_MOUNT%20OPTIONS -[mount.8]: http://man7.org/linux/man-pages/man8/mount.8.html +[setrlimit.2]: http://man7.org/linux/man-pages/man2/setrlimit.2.html [stdin.3]: http://man7.org/linux/man-pages/man3/stdin.3.html +[uts-namespace.7]: http://man7.org/linux/man-pages/man7/namespaces.7.html +[zonecfg.1m]: http://docs.oracle.com/cd/E86824_01/html/E54764/zonecfg-1m.html diff --git a/glossary.md b/glossary.md index 273541149..6180db4c5 100644 --- a/glossary.md +++ b/glossary.md @@ -15,7 +15,7 @@ For example, namespaces, resource limits, and mounts are all part of the contain ## Container namespace -On Linux, a leaf in the [namespace][namespaces.7] hierarchy in which the [configured process](config.md#process) executes. +On Linux,the [namespaces][namespaces.7] in which the [configured process](config.md#process) executes. ## JSON @@ -30,9 +30,9 @@ It reads the [configuration files](#configuration) from a [bundle](#bundle), use ## Runtime namespace -On Linux, a leaf in the [namespace][namespaces.7] hierarchy from which the [runtime](#runtime) process is executed. -New container namespaces will be created as children of the runtime namespaces. +On Linux, the namespaces from which new [container namespaces](#container-namespace) are [created](config-linux.md#namespaces) and from which some configured resources are accessed. [JSON]: https://tools.ietf.org/html/rfc7159 [UTF-8]: http://www.unicode.org/versions/Unicode8.0.0/ch03.pdf + [namespaces.7]: http://man7.org/linux/man-pages/man7/namespaces.7.html diff --git a/implementations.md b/implementations.md index 218bb8c5b..8479d6d71 100644 --- a/implementations.md +++ b/implementations.md @@ -1,19 +1,27 @@ -# Implementations +# Implementations The following sections link to associated projects, some of which are maintained by the OCI and some of which are maintained by external organizations. If you know of any associated projects that are not listed here, please file a pull request adding a link to that project. -## Runtime (Container) +## Runtime (Container) -* [opencontainers/runc](https://github.com/opencontainers/runc) - Reference implementation of OCI runtime +* [opencontainers/runc][runc] - Reference implementation of OCI runtime -## Runtime (Virtual Machine) +## Runtime (Virtual Machine) -* [hyperhq/runv](https://github.com/hyperhq/runv) - Hypervisor-based runtime for OCI -* [01org/cc-oci-runtime](https://github.com/01org/cc-oci-runtime) - Hypervisor-based OCI runtime for Intel® Architecture +* [hyperhq/runv][runv] - Hypervisor-based runtime for OCI +* [01org/cc-oci-runtime][cc-oci] - Hypervisor-based OCI runtime for Intel® Architecture -## Testing & Tools +## Testing & Tools -* [kunalkushwaha/octool](https://github.com/kunalkushwaha/octool) - A config linter and validator. -* [huawei-openlab/oct](https://github.com/huawei-openlab/oct) - Open Container Testing framework for OCI configuration and runtime -* [opencontainers/runtime-tools](https://github.com/opencontainers/runtime-tools) - A config generator and runtime/bundle testing framework. +* [kunalkushwaha/octool][octool] - A config linter and validator. +* [huawei-openlab/oct][oct] - Open Container Testing framework for OCI configuration and runtime +* [opencontainers/runtime-tools][runtime-tools] - A config generator and runtime/bundle testing framework. + + +[runc]: https://github.com/opencontainers/runc +[runv]: https://github.com/hyperhq/runv +[cc-oci]: https://github.com/01org/cc-oci-runtime +[octool]: https://github.com/kunalkushwaha/octool +[oct]: https://github.com/huawei-openlab/oct +[runtime-tools]: https://github.com/opencontainers/runtime-tools diff --git a/meeting.ics b/meeting.ics new file mode 100644 index 000000000..c042a33b0 --- /dev/null +++ b/meeting.ics @@ -0,0 +1,52 @@ +BEGIN:VCALENDAR +VERSION:2.0 +PRODID:-//Open Containers Initiative//Developer Meeting//EN +BEGIN:VTIMEZONE +TZID:America/Los_Angeles +LAST-MODIFIED:20050809T050000Z +BEGIN:STANDARD +DTSTART:20071104T020000 +RRULE:FREQ=YEARLY;BYMONTH=11;BYDAY=1SU +TZOFFSETFROM:-0700 +TZOFFSETTO:-0800 +TZNAME:PST +END:STANDARD +BEGIN:DAYLIGHT +DTSTART:20070311T020000 +RRULE:FREQ=YEARLY;BYMONTH=3;BYDAY=2SU +TZOFFSETFROM:-0800 +TZOFFSETTO:-0700 +TZNAME:PDT +END:DAYLIGHT +END:VTIMEZONE +BEGIN:VEVENT +UID:tdc-meeting-1@opencontainers.org +DTSTAMP:20170405T220000Z +DTSTART;TZID=America/Los_Angeles:20170329T080000 +RRULE:FREQ=WEEKLY;INTERVAL=2;BYDAY=WE +DURATION:PT1H +SUMMARY:OCI TDC Meeting +DESCRIPTION;ALTREP="https://github.com/opencontainers/runtime-spec# + weekly-call":Open Containers Initiative Developer Meeting\n + https://github.com/opencontainers/runtime-spec#weekly-call\n + Web: https://www.uberconference.com/opencontainers\n + Audio-only: +1 415 968 0849 (no PIN needed) +LOCATION:https://www.uberconference.com/opencontainers +URL:https://github.com/opencontainers/runtime-spec/blob/master/meeting.ics +END:VEVENT +BEGIN:VEVENT +UID:tdc-meeting-2@opencontainers.org +DTSTAMP:20170517T143500Z +DTSTART;TZID=America/Los_Angeles:20170517T140000 +RRULE:FREQ=WEEKLY;INTERVAL=2;BYDAY=WE +DURATION:PT1H +SUMMARY:OCI TDC Meeting +DESCRIPTION;ALTREP="https://github.com/opencontainers/runtime-spec# + weekly-call":Open Containers Initiative Developer Meeting\n + https://github.com/opencontainers/runtime-spec#weekly-call\n + Web: https://www.uberconference.com/opencontainers\n + Audio-only: +1 415 968 0849 (no PIN needed) +LOCATION:https://www.uberconference.com/opencontainers +URL:https://github.com/opencontainers/runtime-spec/blob/master/meeting.ics +END:VEVENT +END:VCALENDAR diff --git a/project.md b/project.md index 2f8f0767d..3f8a09b9d 100644 --- a/project.md +++ b/project.md @@ -1,10 +1,12 @@ -# Project docs +# Project docs -## Release Process +## Release Process * Increment version in [`specs-go/version.go`](specs-go/version.go) * `git commit` version increment * `git tag` the prior commit (preferably signed tag) * `make docs` to produce PDF and HTML copies of the spec -* Make a release on [github.com/opencontainers/runtime-spec](https://github.com/opencontainers/runtime-spec/releases) for the version. Attach the produced docs. +* Make a [release][releases] for the version. Attach the produced docs. + +[releases]: https://github.com/opencontainers/runtime-spec/releases diff --git a/runtime-linux.md b/runtime-linux.md index d60418b85..16c6dbebb 100644 --- a/runtime-linux.md +++ b/runtime-linux.md @@ -3,12 +3,12 @@ ## File descriptors By default, only the `stdin`, `stdout` and `stderr` file descriptors are kept open for the application by the runtime. -The runtime MAY pass additional file descriptors to the application to support features such as [socket activation](http://0pointer.de/blog/projects/socket-activated-containers.html). +The runtime MAY pass additional file descriptors to the application to support features such as [socket activation][socket-activated-containers]. Some of the file descriptors MAY be redirected to `/dev/null` even though they are open. ## Dev symbolic links -After the container has `/proc` mounted, the following standard symlinks MUST be setup within `/dev/` for the IO. +While creating the container (step 2 in the [lifecycle](runtime.md#lifecycle)), runtimes MUST create the following symlinks if the source file exists after processing [`mounts`](config.md#mounts): | Source | Destination | | --------------- | ----------- | @@ -16,3 +16,6 @@ After the container has `/proc` mounted, the following standard symlinks MUST be | /proc/self/fd/0 | /dev/stdin | | /proc/self/fd/1 | /dev/stdout | | /proc/self/fd/2 | /dev/stderr | + + +[socket-activated-containers]: http://0pointer.de/blog/projects/socket-activated-containers.html diff --git a/runtime.md b/runtime.md index 54f4dc12f..b9c3b94d7 100644 --- a/runtime.md +++ b/runtime.md @@ -1,31 +1,32 @@ -# Runtime and Lifecycle +# Runtime and Lifecycle -## Scope of a Container +## Scope of a Container -Barring access control concerns, the entity using a runtime to create a container MUST be able to use the operations defined in this specification against that same container. +The entity using a runtime to create a container MUST be able to use the operations defined in this specification against that same container. Whether other entities using the same, or other, instance of the runtime can see that container is out of scope of this specification. -## State +## State The state of a container includes the following properties: * **`ociVersion`** (string, REQUIRED) is the OCI specification version used when creating the container. * **`id`** (string, REQUIRED) is the container's ID. -This MUST be unique across all containers on this host. -There is no requirement that it be unique across hosts. + This MUST be unique across all containers on this host. + There is no requirement that it be unique across hosts. * **`status`** (string, REQUIRED) is the runtime state of the container. -The value MAY be one of: + The value MAY be one of: - * `created`: the container process has neither exited nor executed the user-specified program - * `running`: the container process has executed the user-specified program but has not exited - * `stopped`: the container process has exited + * `creating`: the container is being created (step 2 in the [lifecycle](#lifecycle)) + * `created`: the runtime has finished the [create operation](#create) (after step 2 in the [lifecycle](#lifecycle)), and the container process has neither exited nor executed the user-specified program + * `running`: the container process has executed the user-specified program but has not exited (after step 5 in the [lifecycle](#lifecycle)) + * `stopped`: the container process has exited (step 7 in the [lifecycle](#lifecycle)) Additional values MAY be defined by the runtime, however, they MUST be used to represent new runtime states not defined above. * **`pid`** (int, REQUIRED when `status` is `created` or `running`) is the ID of the container process, as seen by the host. * **`bundle`** (string, REQUIRED) is the absolute path to the container's bundle directory. -This is provided so that consumers can find the container's configuration and root filesystem on the host. + This is provided so that consumers can find the container's configuration and root filesystem on the host. * **`annotations`** (map, OPTIONAL) contains the list of annotations associated with the container. -If no annotations were provided then this property MAY either be absent or an empty map. + If no annotations were provided then this property MAY either be absent or an empty map. The state MAY include additional properties. @@ -46,89 +47,96 @@ When serialized in JSON, the format MUST adhere to the following pattern: See [Query State](#query-state) for information on retrieving the state of a container. -## Lifecycle +## Lifecycle The lifecycle describes the timeline of events that happen from when a container is created to when it ceases to exist. 1. OCI compliant runtime's [`create`](runtime.md#create) command is invoked with a reference to the location of the bundle and a unique identifier. 2. The container's runtime environment MUST be created according to the configuration in [`config.json`](config.md). - If the runtime is unable to create the environment specified in the [`config.json`](config.md), it MUST generate an error. - While the resources requested in the [`config.json`](config.md) MUST be created, the user-specified program (from [`process`](config.md#process)) MUST NOT be run at this time. - Any updates to [`config.json`](config.md) after this step MUST NOT affect the container. -3. Once the container is created additional actions MAY be performed based on the features the runtime chooses to support. - However, some actions might only be available based on the current state of the container (e.g. only available while it is started). -4. Runtime's [`start`](runtime.md#start) command is invoked with the unique identifier of the container. - The runtime MUST run the user-specified program, as specified by [`process`](config.md#process). -5. The container process exits. - This MAY happen due to erroring out, exiting, crashing or the runtime's [`kill`](runtime.md#kill) operation being invoked. -6. Runtime's [`delete`](runtime.md#delete) command is invoked with the unique identifier of the container. - The container MUST be destroyed by undoing the steps performed during create phase (step 2). - -## Errors + If the runtime is unable to create the environment specified in the [`config.json`](config.md), it MUST [generate an error](#errors). + While the resources requested in the [`config.json`](config.md) MUST be created, the user-specified program (from [`process`](config.md#process)) MUST NOT be run at this time. + Any updates to [`config.json`](config.md) after this step MUST NOT affect the container. +3. Runtime's [`start`](runtime.md#start) command is invoked with the unique identifier of the container. +4. The [prestart hooks](config.md#prestart) MUST be invoked by the runtime. + If any prestart hook fails, the runtime MUST [generate an error](#errors), stop the container, and continue the lifecycle at step 9. +5. The runtime MUST run the user-specified program, as specified by [`process`](config.md#process). +6. The [poststart hooks](config.md#poststart) MUST be invoked by the runtime. + If any poststart hook fails, the runtime MUST [log a warning](#warnings), but the remaining hooks and lifecycle continue as if the hook had succeeded. +7. The container process exits. + This MAY happen due to erroring out, exiting, crashing or the runtime's [`kill`](runtime.md#kill) operation being invoked. +8. Runtime's [`delete`](runtime.md#delete) command is invoked with the unique identifier of the container. +9. The container MUST be destroyed by undoing the steps performed during create phase (step 2). +10. The [poststop hooks](config.md#poststop) MUST be invoked by the runtime. + If any poststop hook fails, the runtime MUST [log a warning](#warnings), but the remaining hooks and lifecycle continue as if the hook had succeeded. + +## Errors In cases where the specified operation generates an error, this specification does not mandate how, or even if, that error is returned or exposed to the user of an implementation. Unless otherwise stated, generating an error MUST leave the state of the environment as if the operation were never attempted - modulo any possible trivial ancillary changes such as logging. -## Operations +## Warnings -OCI compliant runtimes MUST support the following operations, unless the operation is not supported by the base operating system. +In cases where the specified operation logs a warning, this specification does not mandate how, or even if, that warning is returned or exposed to the user of an implementation. +Unless otherwise stated, logging a warning does not change the flow of the operation; it MUST continue as if the warning had not been logged. + +## Operations + +Unless otherwise stated, runtimes MUST support the following operations. Note: these operations are not specifying any command-line APIs, and the parameters are inputs for general operations. -### Query State +### Query State `state ` -This operation MUST generate an error if it is not provided the ID of a container. -Attempting to query a container that does not exist MUST generate an error. +This operation MUST [generate an error](#errors) if it is not provided the ID of a container. +Attempting to query a container that does not exist MUST [generate an error](#errors). This operation MUST return the state of a container as specified in the [State](#state) section. -### Create +### Create `create ` -This operation MUST generate an error if it is not provided a path to the bundle and the container ID to associate with the container. -If the ID provided is not unique across all containers within the scope of the runtime, or is not valid in any other way, the implementation MUST generate an error and a new container MUST NOT be created. -Using the data in [`config.json`](config.md), this operation MUST create a new container. -This means that all of the resources associated with the container MUST be created, however, the user-specified program MUST NOT be run at this time. -If the runtime cannot create the container as specified in [`config.json`](config.md), it MUST generate an error and a new container MUST NOT be created. +This operation MUST [generate an error](#errors) if it is not provided a path to the bundle and the container ID to associate with the container. +If the ID provided is not unique across all containers within the scope of the runtime, or is not valid in any other way, the implementation MUST [generate an error](#errors) and a new container MUST NOT be created. +This operation MUST create a new container. -Upon successful completion of this operation the `status` property of this container MUST be `created`. +All of the properties configured in [`config.json`](config.md) except for [`process`](config.md#process) MUST be applied. +[`process.args`](config.md#process) MUST NOT be applied until triggered by the [`start`](#start) operation. +The remaining `process` properties MAY be applied by this operation. +If the runtime cannot apply a property as specified in the [configuration](config.md), it MUST [generate an error](#errors) and a new container MUST NOT be created. The runtime MAY validate `config.json` against this spec, either generically or with respect to the local system capabilities, before creating the container ([step 2](#lifecycle)). Runtime callers who are interested in pre-create validation can run [bundle-validation tools](implementations.md#testing--tools) before invoking the create operation. Any changes made to the [`config.json`](config.md) file after this operation will not have an effect on the container. -### Start +### Start `start ` -This operation MUST generate an error if it is not provided the container ID. -Attempting to start a container that does not exist MUST generate an error. -Attempting to start an already started container MUST have no effect on the container and MUST generate an error. +This operation MUST [generate an error](#errors) if it is not provided the container ID. +Attempting to start a container that does not exist MUST [generate an error](#errors). +Attempting to start an already started container MUST have no effect on the container and MUST [generate an error](#errors). This operation MUST run the user-specified program as specified by [`process`](config.md#process). +This operation MUST generate an error if `process` was not set. -Upon successful completion of this operation the `status` property of this container MUST be `running`. - -### Kill +### Kill `kill ` -This operation MUST generate an error if it is not provided the container ID. -Attempting to send a signal to a container that is not running MUST have no effect on the container and MUST generate an error. +This operation MUST [generate an error](#errors) if it is not provided the container ID. +Attempting to send a signal to a container that is not running MUST have no effect on the container and MUST [generate an error](#errors). This operation MUST send the specified signal to the process in the container. -When the process in the container is stopped, irrespective of it being as a result of a `kill` operation or any other reason, the `status` property of this container MUST be `stopped`. - -### Delete +### Delete `delete ` -This operation MUST generate an error if it is not provided the container ID. -Attempting to delete a container that does not exist MUST generate an error. -Attempting to delete a container whose process is still running MUST generate an error. +This operation MUST [generate an error](#errors) if it is not provided the container ID. +Attempting to delete a container that does not exist MUST [generate an error](#errors). +Attempting to delete a container whose process is still running MUST [generate an error](#errors). Deleting a container MUST delete the resources that were created during the `create` step. Note that resources associated with the container, but not created by this container, MUST NOT be deleted. Once a container is deleted its ID MAY be used by a subsequent container. -## Hooks +## Hooks Many of the operations specified in this specification have "hooks" that allow for additional actions to be taken before or after each operation. See [runtime configuration for hooks](./config.md#hooks) for more information. diff --git a/schema/Makefile b/schema/Makefile index 3016fb889..620cb6849 100644 --- a/schema/Makefile +++ b/schema/Makefile @@ -1,16 +1,50 @@ +GOOD_TESTS = $(wildcard test/good/*.json) +BAD_TESTS = $(wildcard test/bad/*.json) -default: help +default: validate help: - @echo "Usage: make " + @echo "Usage: make [target]" @echo @echo " * 'fmt' - format the json with indentation" + @echo " * 'help' - show this help information" @echo " * 'validate' - build the validation tool" fmt: - for i in *.json ; do jq --indent 4 -M . "$${i}" > xx && cat xx > "$${i}" && rm xx ; done + find . -name '*.json' -exec bash -c 'jq --indent 4 -M . {} > xx && mv xx {} || echo "skipping invalid {}"' \; +.PHONY: validate validate: validate.go go get -d ./... go build ./validate.go +test: validate $(TESTS) + for TYPE in $$(ls test); \ + do \ + echo "testing $${TYPE}"; \ + for FILE in $$(ls "test/$${TYPE}/good"); \ + do \ + echo " testing test/$${TYPE}/good/$${FILE}"; \ + if ./validate "$${TYPE}-schema.json" "test/$${TYPE}/good/$${FILE}" ; \ + then \ + echo " received expected validation success" ; \ + else \ + echo " received unexpected validation failure" ; \ + exit 1; \ + fi \ + done; \ + for FILE in $$(ls "test/$${TYPE}/bad"); \ + do \ + echo " testing test/$${TYPE}/bad/$${FILE}"; \ + if ./validate "$${TYPE}-schema.json" "test/$${TYPE}/good/$${FILE}" ; \ + then \ + echo " received unexpected validation success" ; \ + exit 1; \ + else \ + echo " received expected validation failure" ; \ + fi \ + done; \ + done + +clean: + rm -f validate diff --git a/schema/config-linux.json b/schema/config-linux.json index 17cc72dcc..77f8867d9 100644 --- a/schema/config-linux.json +++ b/schema/config-linux.json @@ -47,12 +47,6 @@ "$ref": "defs-linux.json#/definitions/DeviceCgroup" } }, - "oomScoreAdj": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/oomScoreAdj", - "type": "integer", - "minimum": -1000, - "maximum": 1000 - }, "pids": { "id": "https://opencontainers.org/schema/bundle/linux/resources/pids", "type": "object", @@ -70,44 +64,44 @@ "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO", "type": "object", "properties": { - "blkioWeight": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioWeight", - "$ref": "defs-linux.json#/definitions/blkioWeight" + "weight": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/weight", + "$ref": "defs-linux.json#/definitions/weight" }, - "blkioLeafWeight": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioLeafWeight", - "$ref": "defs-linux.json#/definitions/blkioWeight" + "leafWeight": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/leafWeight", + "$ref": "defs-linux.json#/definitions/weight" }, - "blkioThrottleReadBpsDevice": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioThrottleReadBpsDevice", + "throttleReadBpsDevice": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/throttleReadBpsDevice", "type": "array", "items": { "$ref": "defs-linux.json#/definitions/blockIODeviceThrottle" } }, - "blkioThrottleWriteBpsDevice": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioThrottleWriteBpsDevice", + "throttleWriteBpsDevice": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/throttleWriteBpsDevice", "type": "array", "items": { "$ref": "defs-linux.json#/definitions/blockIODeviceThrottle" } }, - "blkioThrottleReadIopsDevice": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioThrottleReadIopsDevice", + "throttleReadIopsDevice": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/throttleReadIopsDevice", "type": "array", "items": { "$ref": "defs-linux.json#/definitions/blockIODeviceThrottle" } }, - "blkioThrottleWriteIopsDevice": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioThrottleWriteIopsDevice", + "throttleWriteIopsDevice": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/throttleWriteIopsDevice", "type": "array", "items": { "$ref": "defs-linux.json#/definitions/blockIODeviceThrottle" } }, - "blkioWeightDevice": { - "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/blkioWeightDevice", + "weightDevice": { + "id": "https://opencontainers.org/schema/bundle/linux/resources/blockIO/weightDevice", "type": "array", "items": { "$ref": "defs-linux.json#/definitions/blockIODeviceWeight" @@ -121,11 +115,11 @@ "properties": { "cpus": { "id": "https://opencontainers.org/schema/bundle/linux/resources/cpu/cpus", - "$ref": "string" + "type": "string" }, "mems": { "id": "https://opencontainers.org/schema/bundle/linux/resources/cpu/mems", - "$ref": "string" + "type": "string" }, "period": { "id": "https://opencontainers.org/schema/bundle/linux/resources/cpu/period", @@ -163,7 +157,7 @@ "type": "string" }, "limit": { - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" } }, "required": [ @@ -178,23 +172,23 @@ "properties": { "kernel": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/kernel", - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" }, "kernelTCP": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/kernelTCP", - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" }, "limit": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/limit", - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" }, "reservation": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/reservation", - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" }, "swap": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/swap", - "$ref": "defs.json#/definitions/int64" + "$ref": "defs.json#/definitions/uint64" }, "swappiness": { "id": "https://opencontainers.org/schema/bundle/linux/resources/memory/swappiness", @@ -223,11 +217,11 @@ }, "cgroupsPath": { "id": "https://opencontainers.org/schema/bundle/linux/cgroupsPath", - "$ref": "string" + "type": "string" }, "rootfsPropagation": { "id": "https://opencontainers.org/schema/bundle/linux/rootfsPropagation", - "type": "string" + "$ref": "defs-linux.json#/definitions/RootfsPropagation" }, "seccomp": { "id": "https://opencontainers.org/schema/bundle/linux/seccomp", @@ -251,7 +245,10 @@ "$ref": "defs-linux.json#/definitions/Syscall" } } - } + }, + "required": [ + "defaultAction" + ] }, "sysctl": { "id": "https://opencontainers.org/schema/bundle/linux/sysctl", diff --git a/schema/config-schema.json b/schema/config-schema.json index db2146cb5..2ba139d95 100644 --- a/schema/config-schema.json +++ b/schema/config-schema.json @@ -37,31 +37,10 @@ "$ref": "defs.json#/definitions/Mount" } }, - "platform": { - "id": "https://opencontainers.org/schema/bundle/platform", - "type": "object", - "required": [ - "arch", - "os" - ], - "properties": { - "arch": { - "id": "https://opencontainers.org/schema/bundle/platform/arch", - "type": "string" - }, - "os": { - "id": "https://opencontainers.org/schema/bundle/platform/os", - "type": "string" - } - } - }, "root": { "description": "Configures the container's root filesystem.", "id": "https://opencontainers.org/schema/bundle/root", "type": "object", - "required": [ - "path" - ], "properties": { "path": { "id": "https://opencontainers.org/schema/bundle/root/path", @@ -95,11 +74,11 @@ "properties": { "height": { "id": "https://opencontainers.org/schema/bundle/process/consoleSize/height", - "$ref": "defs.json#/definitions/unit64" + "$ref": "defs.json#/definitions/uint64" }, "width": { "id": "https://opencontainers.org/schema/bundle/process/consoleSize/width", - "$ref": "defs.json#/definitions/unit64" + "$ref": "defs.json#/definitions/uint64" } } }, @@ -130,20 +109,62 @@ "additionalGids": { "id": "https://opencontainers.org/schema/bundle/process/user/additionalGids", "$ref": "defs.json#/definitions/ArrayOfGIDs" + }, + "username": { + "id": "https://opencontainers.org/schema/bundle/process/user/username", + "type": "string" } } }, "capabilities": { "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities", - "type": "array", - "items": { - "$ref": "defs-linux.json#/definitions/Capability" + "type": "object", + "properties": { + "bounding": { + "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities/bounding", + "type": "array", + "items": { + "type": "string" + } + }, + "permitted": { + "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities/permitted", + "type": "array", + "items": { + "type": "string" + } + }, + "effective": { + "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities/effective", + "type": "array", + "items": { + "type": "string" + } + }, + "inheritable": { + "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities/inheritable", + "type": "array", + "items": { + "type": "string" + } + }, + "ambient": { + "id": "https://opencontainers.org/schema/bundle/process/linux/capabilities/ambient", + "type": "array", + "items": { + "type": "string" + } + } } }, "apparmorProfile": { "id": "https://opencontainers.org/schema/bundle/process/linux/apparmorProfile", "type": "string" }, + "oomScoreAdj": { + "id": "https://opencontainers.org/schema/bundle/process/linux/oomScoreAdj", + "type": "integer" + }, "selinuxLabel": { "id": "https://opencontainers.org/schema/bundle/process/linux/selinuxLabel", "type": "string" @@ -158,6 +179,11 @@ "items": { "id": "https://opencontainers.org/schema/bundle/linux/rlimits/0", "type": "object", + "required": [ + "type", + "soft", + "hard" + ], "properties": { "hard": { "id": "https://opencontainers.org/schema/bundle/linux/rlimits/0/hard", @@ -189,9 +215,6 @@ }, "required": [ "ociVersion", - "platform", - "process", - "root", - "mounts" + "root" ] } diff --git a/schema/config-windows.json b/schema/config-windows.json index 38f7d6045..a107b9206 100644 --- a/schema/config-windows.json +++ b/schema/config-windows.json @@ -4,6 +4,14 @@ "id": "https://opencontainers.org/schema/bundle/windows", "type": "object", "properties": { + "layerFolders": { + "id": "https://opencontainers.org/schema/bundle/windows/layerFolders", + "type": "array", + "items": { + "$ref": "defs.json#/definitions/FilePath" + }, + "minItems": 1 + }, "resources": { "id": "https://opencontainers.org/schema/bundle/windows/resources", "type": "object", @@ -15,10 +23,6 @@ "limit": { "id": "https://opencontainers.org/schema/bundle/windows/resources/memory/limit", "$ref": "defs.json#/definitions/uint64" - }, - "reservation": { - "id": "https://opencontainers.org/schema/bundle/windows/resources/memory/reservation", - "$ref": "defs.json#/definitions/uint64" } } }, @@ -32,11 +36,11 @@ }, "shares": { "id": "https://opencontainers.org/schema/bundle/windows/resources/cpu/shares", - "$ref": "defs-windows.json#/definitions/cpuShares" + "$ref": "defs.json#/definitions/uint16" }, - "percent": { - "id": "https://opencontainers.org/schema/bundle/windows/resources/cpu/percent", - "$ref": "defs.json#/definitions/percent" + "maximum": { + "id": "https://opencontainers.org/schema/bundle/windows/resources/cpu/maximum", + "$ref": "defs.json#/definitions/uint16" } } }, @@ -57,19 +61,56 @@ "$ref": "defs.json#/definitions/uint64" } } + } + } + }, + "network": { + "id": "https://opencontainers.org/schema/bundle/windows/network", + "type": "object", + "properties": { + "endpointList": { + "id": "https://opencontainers.org/schema/bundle/windows/network/endpointList", + "$ref": "defs.json#/definitions/ArrayOfStrings" }, - "network": { - "id": "https://opencontainers.org/schema/bundle/windows/resources/network", - "type": "object", - "properties": { - "egressBandwidth": { - "id": "https://opencontainers.org/schema/bundle/windows/resources/network/egressBandwidth", - "$ref": "defs.json#/definitions/uint64" - } - } + "allowUnqualifiedDNSQuery": { + "id": "https://opencontainers.org/schema/bundle/windows/network/allowUnqualifiedDNSQuery", + "type": "boolean" + }, + "DNSSearchList": { + "id": "https://opencontainers.org/schema/bundle/windows/network/DNSSearchList", + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "networkSharedContainerName": { + "id": "https://opencontainers.org/schema/bundle/windows/network/networkSharedContainerName", + "type": "string" + } + } + }, + "credentialSpec": { + "id": "https://opencontainers.org/schema/bundle/windows/credentialSpec", + "type": "object" + }, + "servicing": { + "id": "https://opencontainers.org/schema/bundle/windows/servicing", + "type": "boolean" + }, + "ignoreFlushesDuringBoot": { + "id": "https://opencontainers.org/schema/bundle/windows/ignoreFlushesDuringBoot", + "type": "boolean" + }, + "hyperv": { + "id": "https://opencontainers.org/schema/bundle/windows/hyperv", + "type": "object", + "properties": { + "utilityVMPath": { + "id": "https://opencontainers.org/schema/bundle/windows/hyperv/utilityVMPath", + "type": "string" } } } - } + }, + "required": [ + "layerFolders" + ] } -} +} \ No newline at end of file diff --git a/schema/defs-linux.json b/schema/defs-linux.json index 1611a0c6e..241fad78d 100644 --- a/schema/defs-linux.json +++ b/schema/defs-linux.json @@ -1,5 +1,14 @@ { "definitions": { + "RootfsPropagation": { + "type": "string", + "enum": [ + "private", + "shared", + "slave", + "unbindable" + ] + }, "SeccompArch": { "type": "string", "enum": [ @@ -18,7 +27,9 @@ "SCMP_ARCH_PPC64", "SCMP_ARCH_PPC64LE", "SCMP_ARCH_S390", - "SCMP_ARCH_S390X" + "SCMP_ARCH_S390X", + "SCMP_ARCH_PARISC", + "SCMP_ARCH_PARISC64" ] }, "SeccompAction": { @@ -63,8 +74,12 @@ "Syscall": { "type": "object", "properties": { - "name": { - "type": "string" + "names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 }, "action": { "$ref": "#/definitions/SeccompAction" @@ -75,12 +90,10 @@ "$ref": "#/definitions/SyscallArg" } } - } - }, - "Capability": { - "description": "Linux process permissions", - "type": "string", - "pattern": "^CAP_([A-Z]|_)+$" + }, + "required": [ + "names" + ] }, "Major": { "description": "major device number", @@ -105,9 +118,7 @@ "type": "object", "required": [ "type", - "path", - "major", - "minor" + "path" ], "properties": { "type": { @@ -133,10 +144,8 @@ } } }, - "blkioWeight": { - "type": "integer", - "minimum": 10, - "maximum": 1000 + "weight": { + "type": "integer" }, "blockIODevice": { "type": "object", @@ -163,10 +172,10 @@ "type": "object", "properties": { "weight": { - "$ref": "#/definitions/blkioWeight" + "$ref": "#/definitions/weight" }, "leafWeight": { - "$ref": "#/definitions/blkioWeight" + "$ref": "#/definitions/weight" } } } @@ -194,7 +203,7 @@ "type": "boolean" }, "type": { - "$ref": "string" + "type": "string" }, "major": { "$ref": "#/definitions/Major" @@ -203,7 +212,7 @@ "$ref": "#/definitions/Minor" }, "access": { - "$ref": "string" + "type": "string" } }, "required": [ diff --git a/schema/defs-windows.json b/schema/defs-windows.json deleted file mode 100644 index 6296da0a6..000000000 --- a/schema/defs-windows.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "definitions": { - "cpuShares": { - "description": "Relative weight to other containers with CPU Shares defined", - "type": "integer", - "minimum": 1, - "maximum": 10000 - } - } -} diff --git a/schema/defs.json b/schema/defs.json index 14e3e66c6..b8161ac87 100644 --- a/schema/defs.json +++ b/schema/defs.json @@ -91,11 +91,11 @@ "$ref": "#/definitions/Env" }, "timeout": { - "$ref": "#/definitions/int" + "type": "integer", + "minimum": 1 } }, "required": [ - "args", "path" ] }, @@ -141,9 +141,7 @@ } }, "required": [ - "destination", - "source", - "type" + "destination" ] }, "ociVersion": { diff --git a/schema/state-schema.json b/schema/state-schema.json index 5ea3bf44f..74544980f 100644 --- a/schema/state-schema.json +++ b/schema/state-schema.json @@ -17,6 +17,7 @@ "id": "https://opencontainers.org/schema/runtime/state/status", "type": "string", "enum": [ + "creating", "created", "running", "stopped" diff --git a/schema/test/config/bad/invalid-json.json b/schema/test/config/bad/invalid-json.json new file mode 100644 index 000000000..8e9352830 --- /dev/null +++ b/schema/test/config/bad/invalid-json.json @@ -0,0 +1 @@ +{] diff --git a/schema/test/config/good/minimal-for-start.json b/schema/test/config/good/minimal-for-start.json new file mode 100644 index 000000000..766d2d380 --- /dev/null +++ b/schema/test/config/good/minimal-for-start.json @@ -0,0 +1,16 @@ +{ + "ociVersion": "1.0.0", + "root": { + "path": "rootfs" + }, + "process": { + "cwd": "/", + "args": [ + "sh" + ], + "user": { + "uid": 0, + "gid": 0 + } + } +} diff --git a/schema/test/config/good/minimal.json b/schema/test/config/good/minimal.json new file mode 100644 index 000000000..e26e9d746 --- /dev/null +++ b/schema/test/config/good/minimal.json @@ -0,0 +1,6 @@ +{ + "ociVersion": "1.0.0", + "root": { + "path": "rootfs" + } +} diff --git a/schema/test/config/good/spec-example.json b/schema/test/config/good/spec-example.json new file mode 100644 index 000000000..d6e31201f --- /dev/null +++ b/schema/test/config/good/spec-example.json @@ -0,0 +1,368 @@ +{ + "ociVersion": "0.5.0-dev", + "process": { + "terminal": true, + "user": { + "uid": 1, + "gid": 1, + "additionalGids": [ + 5, + 6 + ] + }, + "args": [ + "sh" + ], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "permitted": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "inheritable": [ + "CAP_AUDIT_WRITE", + "CAP_KILL", + "CAP_NET_BIND_SERVICE" + ], + "effective": [ + "CAP_AUDIT_WRITE", + "CAP_KILL" + ], + "ambient": [ + "CAP_NET_BIND_SERVICE" + ] + }, + "rlimits": [ + { + "type": "RLIMIT_CORE", + "hard": 1024, + "soft": 1024 + }, + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "apparmorProfile": "acme_secure_profile", + "selinuxLabel": "system_u:system_r:svirt_lxc_net_t:s0:c124,c675", + "noNewPrivileges": true + }, + "root": { + "path": "rootfs", + "readonly": true + }, + "hostname": "slartibartfast", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev", + "type": "tmpfs", + "source": "tmpfs", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ] + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + }, + { + "destination": "/dev/shm", + "type": "tmpfs", + "source": "shm", + "options": [ + "nosuid", + "noexec", + "nodev", + "mode=1777", + "size=65536k" + ] + }, + { + "destination": "/dev/mqueue", + "type": "mqueue", + "source": "mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys", + "type": "sysfs", + "source": "sysfs", + "options": [ + "nosuid", + "noexec", + "nodev" + ] + }, + { + "destination": "/sys/fs/cgroup", + "type": "cgroup", + "source": "cgroup", + "options": [ + "nosuid", + "noexec", + "nodev", + "relatime", + "ro" + ] + } + ], + "hooks": { + "prestart": [ + { + "path": "/usr/bin/fix-mounts", + "args": [ + "fix-mounts", + "arg1", + "arg2" + ], + "env": [ + "key1=value1" + ] + }, + { + "path": "/usr/bin/setup-network" + } + ], + "poststart": [ + { + "path": "/usr/bin/notify-start", + "timeout": 5 + } + ], + "poststop": [ + { + "path": "/usr/sbin/cleanup.sh", + "args": [ + "cleanup.sh", + "-f" + ] + } + ] + }, + "linux": { + "devices": [ + { + "path": "/dev/fuse", + "type": "c", + "major": 10, + "minor": 229, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/sda", + "type": "b", + "major": 8, + "minor": 0, + "fileMode": 432, + "uid": 0, + "gid": 0 + } + ], + "uidMappings": [ + { + "hostID": 1000, + "containerID": 0, + "size": 32000 + } + ], + "gidMappings": [ + { + "hostID": 1000, + "containerID": 0, + "size": 32000 + } + ], + "sysctl": { + "net.ipv4.ip_forward": "1", + "net.core.somaxconn": "256" + }, + "cgroupsPath": "/myRuntime/myContainer", + "resources": { + "network": { + "classID": 1048577, + "priorities": [ + { + "name": "eth0", + "priority": 500 + }, + { + "name": "eth1", + "priority": 1000 + } + ] + }, + "pids": { + "limit": 32771 + }, + "hugepageLimits": [ + { + "pageSize": "2MB", + "limit": 9223372036854772000 + } + ], + "oomScoreAdj": 100, + "memory": { + "limit": 536870912, + "reservation": 536870912, + "swap": 536870912, + "kernel": 0, + "kernelTCP": 0, + "swappiness": 0 + }, + "cpu": { + "shares": 1024, + "quota": 1000000, + "period": 500000, + "realtimeRuntime": 950000, + "realtimePeriod": 1000000, + "cpus": "2-3", + "mems": "0-7" + }, + "disableOOMKiller": false, + "devices": [ + { + "allow": false, + "access": "rwm" + }, + { + "allow": true, + "type": "c", + "major": 10, + "minor": 229, + "access": "rw" + }, + { + "allow": true, + "type": "b", + "major": 8, + "minor": 0, + "access": "r" + } + ], + "blockIO": { + "weight": 10, + "leafWeight": 10, + "weightDevice": [ + { + "major": 8, + "minor": 0, + "weight": 500, + "leafWeight": 300 + }, + { + "major": 8, + "minor": 16, + "weight": 500 + } + ], + "throttleReadBpsDevice": [ + { + "major": 8, + "minor": 0, + "rate": 600 + } + ], + "throttleWriteIOPSDevice": [ + { + "major": 8, + "minor": 16, + "rate": 300 + } + ] + } + }, + "rootfsPropagation": "slave", + "seccomp": { + "defaultAction": "SCMP_ACT_ALLOW", + "architectures": [ + "SCMP_ARCH_X86", + "SCMP_ARCH_X32" + ], + "syscalls": [ + { + "names": [ + "getcwd", + "chmod" + ], + "action": "SCMP_ACT_ERRNO" + } + ] + }, + "namespaces": [ + { + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + }, + { + "type": "user" + }, + { + "type": "cgroup" + } + ], + "maskedPaths": [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_stats", + "/proc/sched_debug" + ], + "readonlyPaths": [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ], + "mountLabel": "system_u:object_r:svirt_sandbox_file_t:s0:c715,c811" + }, + "annotations": { + "com.example.key1": "value1", + "com.example.key2": "value2" + } +} diff --git a/schema/test/state/bad/invalid-json.json b/schema/test/state/bad/invalid-json.json new file mode 100644 index 000000000..8e9352830 --- /dev/null +++ b/schema/test/state/bad/invalid-json.json @@ -0,0 +1 @@ +{] diff --git a/schema/test/state/good/spec-example.json b/schema/test/state/good/spec-example.json new file mode 100644 index 000000000..a49faee56 --- /dev/null +++ b/schema/test/state/good/spec-example.json @@ -0,0 +1,10 @@ +{ + "ociVersion": "0.2.0", + "id": "oci-container1", + "status": "running", + "pid": 4422, + "bundle": "/containers/redis", + "annotations": { + "myKey": "myValue" + } +} diff --git a/schema/validate.go b/schema/validate.go index 48dd67e1a..847d20540 100644 --- a/schema/validate.go +++ b/schema/validate.go @@ -10,30 +10,53 @@ import ( "github.com/xeipuuv/gojsonschema" ) +const usage = `Validate is used to check document with specified schema. +You can use validate in following ways: + + 1.specify document file as an argument + validate + + 2.pass document content through a pipe + cat | validate + + 3.input document content manually, ended with ctrl+d(or your self-defined EOF keys) + validate + [INPUT DOCUMENT CONTENT HERE] +` + func main() { nargs := len(os.Args[1:]) if nargs == 0 || nargs > 2 { - fmt.Printf("ERROR: usage is: %s []\n", os.Args[0]) + fmt.Printf("ERROR: invalid arguments number\n\n%s\n", usage) + os.Exit(1) + } + + if os.Args[1] == "help" || + os.Args[1] == "--help" || + os.Args[1] == "-h" { + fmt.Printf("%s\n", usage) os.Exit(1) } schemaPath := os.Args[1] if !strings.Contains(schemaPath, "://") { - schemaPath, err := filepath.Abs(schemaPath) + var err error + schemaPath, err = formatFilePath(schemaPath) if err != nil { - fmt.Println(err) + fmt.Printf("ERROR: invalid schema-file path: %s\n", err) os.Exit(1) } schemaPath = "file://" + schemaPath } + schemaLoader := gojsonschema.NewReferenceLoader(schemaPath) var documentLoader gojsonschema.JSONLoader if nargs > 1 { - documentPath, err := filepath.Abs(os.Args[2]) + documentPath, err := formatFilePath(os.Args[2]) if err != nil { - fmt.Println(err) + fmt.Printf("ERROR: invalid document-file path: %s\n", err) os.Exit(1) } documentLoader = gojsonschema.NewReferenceLoader("file://" + documentPath) @@ -49,7 +72,8 @@ func main() { result, err := gojsonschema.Validate(schemaLoader, documentLoader) if err != nil { - panic(err.Error()) + fmt.Println(err) + os.Exit(1) } if result.Valid() { @@ -62,3 +86,15 @@ func main() { os.Exit(1) } } + +func formatFilePath(path string) (string, error) { + if _, err := os.Stat(path); err != nil { + return "", err + } + + absPath, err := filepath.Abs(path) + if err != nil { + return "", err + } + return absPath, nil +} diff --git a/spec.md b/spec.md index 750a9b133..3324eadc2 100644 --- a/spec.md +++ b/spec.md @@ -1,13 +1,23 @@ -# Open Container Initiative Runtime Specification +# Open Container Initiative Runtime Specification -The [Open Container Initiative](http://www.opencontainers.org/) develops specifications for standards on Operating System process and application containers. +The [Open Container Initiative][oci] develops specifications for standards on Operating System process and application containers. -Protocols defined by this specification are: -* Linux containers: [runtime.md](runtime.md), [config.md](config.md), [config-linux.md](config-linux.md), and [runtime-linux.md](runtime-linux.md). -* Solaris containers: [runtime.md](runtime.md), [config.md](config.md), and [config-solaris.md](config-solaris.md). -* Windows containers: [runtime.md](runtime.md), [config.md](config.md), and [config-windows.md](config-windows.md). +# Abstract -# Table of Contents +The OCI Runtime Specification aims to specify the configuration, execution environment, and lifecycle of a container. + +A container's configuration is specified as the `config.json` for the supported platforms and details the fields that enable the creation of a container. +The execution environment is specified to ensure that applications running inside a container have a consistent environment between runtimes along with common actions defined for the container's lifecycle. + +# Platforms + +Platforms defined by this specification are: + +* `linux`: [runtime.md](runtime.md), [config.md](config.md), [config-linux.md](config-linux.md), and [runtime-linux.md](runtime-linux.md). +* `solaris`: [runtime.md](runtime.md), [config.md](config.md), and [config-solaris.md](config-solaris.md). +* `windows`: [runtime.md](runtime.md), [config.md](config.md), and [config-windows.md](config-windows.md). + +# Table of Contents - [Introduction](spec.md) - [Notational Conventions](#notational-conventions) @@ -21,14 +31,16 @@ Protocols defined by this specification are: - [Windows-specific Configuration](config-windows.md) - [Glossary](glossary.md) -# Notational Conventions +# Notational Conventions -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" are to be interpreted as described in [RFC 2119][rfc2119]. +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" are to be interpreted as described in [RFC 2119][rfc2119]. The key words "unspecified", "undefined", and "implementation-defined" are to be interpreted as described in the [rationale for the C99 standard][c99-unspecified]. -An implementation is not compliant for a given CPU architecture if it fails to satisfy one or more of the MUST, REQUIRED, or SHALL requirements for the protocols it implements. -An implementation is compliant for a given CPU architecture if it satisfies all the MUST, REQUIRED, and SHALL requirements for the protocols it implements. +An implementation is not compliant for a given CPU architecture if it fails to satisfy one or more of the MUST, REQUIRED, or SHALL requirements for the [platforms](#platforms) it implements. +An implementation is compliant for a given CPU architecture if it satisfies all the MUST, REQUIRED, and SHALL requirements for the [platforms](#platforms) it implements. + [c99-unspecified]: http://www.open-std.org/jtc1/sc22/wg14/www/C99RationaleV5.10.pdf#page=18 +[oci]: http://www.opencontainers.org [rfc2119]: http://tools.ietf.org/html/rfc2119 diff --git a/specs-go/config.go b/specs-go/config.go index 1660b776f..01c70b447 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -6,10 +6,8 @@ import "os" type Spec struct { // Version of the Open Container Runtime Specification with which the bundle complies. Version string `json:"ociVersion"` - // Platform specifies the configuration's target platform. - Platform Platform `json:"platform"` // Process configures the container process. - Process Process `json:"process"` + Process *Process `json:"process,omitempty"` // Root configures the container's root filesystem. Root Root `json:"root"` // Hostname configures the container's hostname. @@ -17,15 +15,15 @@ type Spec struct { // Mounts configures additional mounts (on top of Root). Mounts []Mount `json:"mounts,omitempty"` // Hooks configures callbacks for container lifecycle events. - Hooks *Hooks `json:"hooks,omitempty"` + Hooks *Hooks `json:"hooks,omitempty" platform:"linux,solaris"` // Annotations contains arbitrary metadata for the container. Annotations map[string]string `json:"annotations,omitempty"` - // Linux is platform specific configuration for Linux based containers. + // Linux is platform-specific configuration for Linux based containers. Linux *Linux `json:"linux,omitempty" platform:"linux"` - // Solaris is platform specific configuration for Solaris containers. + // Solaris is platform-specific configuration for Solaris based containers. Solaris *Solaris `json:"solaris,omitempty" platform:"solaris"` - // Windows is platform specific configuration for Windows based containers, including Hyper-V containers. + // Windows is platform-specific configuration for Windows based containers. Windows *Windows `json:"windows,omitempty" platform:"windows"` } @@ -34,7 +32,7 @@ type Process struct { // Terminal creates an interactive terminal for the container. Terminal bool `json:"terminal,omitempty"` // ConsoleSize specifies the size of the console. - ConsoleSize Box `json:"consoleSize,omitempty"` + ConsoleSize *Box `json:"consoleSize,omitempty"` // User specifies user information for the process. User User `json:"user"` // Args specifies the binary and arguments for the application to execute. @@ -44,18 +42,35 @@ type Process struct { // Cwd is the current working directory for the process and must be // relative to the container's root. Cwd string `json:"cwd"` - // Capabilities are Linux capabilities that are kept for the container. - Capabilities []string `json:"capabilities,omitempty" platform:"linux"` + // Capabilities are Linux capabilities that are kept for the process. + Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` // Rlimits specifies rlimit options to apply to the process. Rlimits []LinuxRlimit `json:"rlimits,omitempty" platform:"linux"` // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` // ApparmorProfile specifies the apparmor profile for the container. ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` + // Specify an oom_score_adj for the container. + OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` // SelinuxLabel specifies the selinux context that the container process is run as. SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` } +// LinuxCapabilities specifies the whitelist of capabilities that are kept for a process. +// http://man7.org/linux/man-pages/man7/capabilities.7.html +type LinuxCapabilities struct { + // Bounding is the set of capabilities checked by the kernel. + Bounding []string `json:"bounding,omitempty" platform:"linux"` + // Effective is the set of capabilities checked by the kernel. + Effective []string `json:"effective,omitempty" platform:"linux"` + // Inheritable is the capabilities preserved across execve. + Inheritable []string `json:"inheritable,omitempty" platform:"linux"` + // Permitted is the limiting superset for effective capabilities. + Permitted []string `json:"permitted,omitempty" platform:"linux"` + // Ambient is the ambient set of capabilities that are kept. + Ambient []string `json:"ambient,omitempty" platform:"linux"` +} + // Box specifies dimensions of a rectangle. Used for specifying the size of a console. type Box struct { // Height is the vertical dimension of a box. @@ -79,29 +94,19 @@ type User struct { // Root contains information about the container's root filesystem on the host. type Root struct { // Path is the absolute path to the container's root filesystem. - Path string `json:"path"` + Path string `json:"path,omitempty"` // Readonly makes the root filesystem for the container readonly before the process is executed. Readonly bool `json:"readonly,omitempty"` } -// Platform specifies OS and arch information for the host system that the container -// is created for. -type Platform struct { - // OS is the operating system. - OS string `json:"os"` - // Arch is the architecture - Arch string `json:"arch"` -} - // Mount specifies a mount for a container. type Mount struct { - // Destination is the path where the mount will be placed relative to the container's root. The path and child directories MUST exist, a runtime MUST NOT create directories automatically to a mount point. + // Destination is the absolute path where the mount will be placed in the container. Destination string `json:"destination"` // Type specifies the mount kind. - Type string `json:"type"` - // Source specifies the source path of the mount. In the case of bind mounts on - // Linux based systems this would be the file on the host. - Source string `json:"source"` + Type string `json:"type,omitempty" platform:"linux,solaris"` + // Source specifies the source path of the mount. + Source string `json:"source,omitempty"` // Options are fstab style mount options. Options []string `json:"options,omitempty"` } @@ -117,7 +122,6 @@ type Hook struct { // Hooks for container setup and teardown type Hooks struct { // Prestart is a list of hooks to be run before the container process is executed. - // On Linux, they are run after the container namespaces are created. Prestart []Hook `json:"prestart,omitempty"` // Poststart is a list of hooks to be run after the container process is started. Poststart []Hook `json:"poststart,omitempty"` @@ -125,11 +129,11 @@ type Hooks struct { Poststop []Hook `json:"poststop,omitempty"` } -// Linux contains platform specific configuration for Linux based containers. +// Linux contains platform-specific configuration for Linux based containers. type Linux struct { - // UIDMapping specifies user mappings for supporting user namespaces on Linux. + // UIDMapping specifies user mappings for supporting user namespaces. UIDMappings []LinuxIDMapping `json:"uidMappings,omitempty"` - // GIDMapping specifies group mappings for supporting user namespaces on Linux. + // GIDMapping specifies group mappings for supporting user namespaces. GIDMappings []LinuxIDMapping `json:"gidMappings,omitempty"` // Sysctl are a set of key value pairs that are set for the container on start Sysctl map[string]string `json:"sysctl,omitempty"` @@ -154,11 +158,14 @@ type Linux struct { ReadonlyPaths []string `json:"readonlyPaths,omitempty"` // MountLabel specifies the selinux context for the mounts in the container. MountLabel string `json:"mountLabel,omitempty"` + // IntelRdt contains Intel Resource Director Technology (RDT) information + // for handling resource constraints (e.g., L3 cache) for the container + IntelRdt *LinuxIntelRdt `json:"intelRdt,omitempty"` } // LinuxNamespace is the configuration for a Linux namespace type LinuxNamespace struct { - // Type is the type of Linux namespace + // Type is the type of namespace Type LinuxNamespaceType `json:"type"` // Path is a path to an existing namespace persisted on disk that can be joined // and is of the same type @@ -210,7 +217,7 @@ type LinuxHugepageLimit struct { // Pagesize is the hugepage size Pagesize string `json:"pageSize"` // Limit is the limit of "hugepagesize" hugetlb usage - Limit int64 `json:"limit"` + Limit uint64 `json:"limit"` } // LinuxInterfacePriority for network interfaces @@ -229,12 +236,12 @@ type linuxBlockIODevice struct { Minor int64 `json:"minor"` } -// LinuxWeightDevice struct holds a `major:minor weight` pair for blkioWeightDevice +// LinuxWeightDevice struct holds a `major:minor weight` pair for weightDevice type LinuxWeightDevice struct { linuxBlockIODevice - // Weight is the bandwidth rate for the device, range is from 10 to 1000 + // Weight is the bandwidth rate for the device. Weight *uint16 `json:"weight,omitempty"` - // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only + // LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, CFQ scheduler only LeafWeight *uint16 `json:"leafWeight,omitempty"` } @@ -247,35 +254,35 @@ type LinuxThrottleDevice struct { // LinuxBlockIO for Linux cgroup 'blkio' resource management type LinuxBlockIO struct { - // Specifies per cgroup weight, range is from 10 to 1000 - Weight *uint16 `json:"blkioWeight,omitempty"` - // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, CFQ scheduler only - LeafWeight *uint16 `json:"blkioLeafWeight,omitempty"` + // Specifies per cgroup weight + Weight *uint16 `json:"weight,omitempty"` + // Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, CFQ scheduler only + LeafWeight *uint16 `json:"leafWeight,omitempty"` // Weight per cgroup per device, can override BlkioWeight - WeightDevice []LinuxWeightDevice `json:"blkioWeightDevice,omitempty"` + WeightDevice []LinuxWeightDevice `json:"weightDevice,omitempty"` // IO read rate limit per cgroup per device, bytes per second - ThrottleReadBpsDevice []LinuxThrottleDevice `json:"blkioThrottleReadBpsDevice,omitempty"` + ThrottleReadBpsDevice []LinuxThrottleDevice `json:"throttleReadBpsDevice,omitempty"` // IO write rate limit per cgroup per device, bytes per second - ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"blkioThrottleWriteBpsDevice,omitempty"` + ThrottleWriteBpsDevice []LinuxThrottleDevice `json:"throttleWriteBpsDevice,omitempty"` // IO read rate limit per cgroup per device, IO per second - ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"blkioThrottleReadIOPSDevice,omitempty"` + ThrottleReadIOPSDevice []LinuxThrottleDevice `json:"throttleReadIOPSDevice,omitempty"` // IO write rate limit per cgroup per device, IO per second - ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"blkioThrottleWriteIOPSDevice,omitempty"` + ThrottleWriteIOPSDevice []LinuxThrottleDevice `json:"throttleWriteIOPSDevice,omitempty"` } // LinuxMemory for Linux cgroup 'memory' resource management type LinuxMemory struct { // Memory limit (in bytes). - Limit *int64 `json:"limit,omitempty"` + Limit *uint64 `json:"limit,omitempty"` // Memory reservation or soft_limit (in bytes). - Reservation *int64 `json:"reservation,omitempty"` + Reservation *uint64 `json:"reservation,omitempty"` // Total memory limit (memory + swap). - Swap *int64 `json:"swap,omitempty"` + Swap *uint64 `json:"swap,omitempty"` // Kernel memory limit (in bytes). - Kernel *int64 `json:"kernel,omitempty"` + Kernel *uint64 `json:"kernel,omitempty"` // Kernel memory limit for tcp (in bytes) - KernelTCP *int64 `json:"kernelTCP,omitempty"` - // How aggressive the kernel will swap memory pages. Range from 0 to 100. + KernelTCP *uint64 `json:"kernelTCP,omitempty"` + // How aggressive the kernel will swap memory pages. Swappiness *uint64 `json:"swappiness,omitempty"` } @@ -317,8 +324,6 @@ type LinuxResources struct { Devices []LinuxDeviceCgroup `json:"devices,omitempty"` // DisableOOMKiller disables the OOM killer for out of memory conditions DisableOOMKiller *bool `json:"disableOOMKiller,omitempty"` - // Specify an oom_score_adj for the container. - OOMScoreAdj *int `json:"oomScoreAdj,omitempty"` // Memory restriction configuration Memory *LinuxMemory `json:"memory,omitempty"` // CPU resource restriction configuration @@ -365,14 +370,7 @@ type LinuxDeviceCgroup struct { Access string `json:"access,omitempty"` } -// LinuxSeccomp represents syscall restrictions -type LinuxSeccomp struct { - DefaultAction LinuxSeccompAction `json:"defaultAction"` - Architectures []Arch `json:"architectures"` - Syscalls []LinuxSyscall `json:"syscalls,omitempty"` -} - -// Solaris contains platform specific configuration for Solaris application containers. +// Solaris contains platform-specific configuration for Solaris application containers. type Solaris struct { // SMF FMRI which should go "online" before we start the container process. Milestone string `json:"milestone,omitempty"` @@ -419,8 +417,20 @@ type SolarisAnet struct { // Windows defines the runtime configuration for Windows based containers, including Hyper-V containers. type Windows struct { + // LayerFolders contains a list of absolute paths to directories containing image layers. + LayerFolders []string `json:"layerFolders"` // Resources contains information for handling resource constraints for the container. Resources *WindowsResources `json:"resources,omitempty"` + // CredentialSpec contains a JSON object describing a group Managed Service Account (gMSA) specification. + CredentialSpec interface{} `json:"credentialSpec,omitempty"` + // Servicing indicates if the container is being started in a mode to apply a Windows Update servicing operation. + Servicing bool `json:"servicing,omitempty"` + // IgnoreFlushesDuringBoot indicates if the container is being started in a mode where disk writes are not flushed during its boot process. + IgnoreFlushesDuringBoot bool `json:"ignoreFlushesDuringBoot,omitempty"` + // HyperV contains information for running a container with Hyper-V isolation. + HyperV *WindowsHyperV `json:"hyperv,omitempty"` + // Network restriction configuration. + Network *WindowsNetwork `json:"network,omitempty"` } // WindowsResources has container runtime resource constraints for containers running on Windows. @@ -431,26 +441,22 @@ type WindowsResources struct { CPU *WindowsCPUResources `json:"cpu,omitempty"` // Storage restriction configuration. Storage *WindowsStorageResources `json:"storage,omitempty"` - // Network restriction configuration. - Network *WindowsNetworkResources `json:"network,omitempty"` } // WindowsMemoryResources contains memory resource management settings. type WindowsMemoryResources struct { // Memory limit in bytes. Limit *uint64 `json:"limit,omitempty"` - // Memory reservation in bytes. - Reservation *uint64 `json:"reservation,omitempty"` } // WindowsCPUResources contains CPU resource management settings. type WindowsCPUResources struct { // Number of CPUs available to the container. Count *uint64 `json:"count,omitempty"` - // CPU shares (relative weight to other containers with cpu shares). Range is from 1 to 10000. + // CPU shares (relative weight to other containers with cpu shares). Shares *uint16 `json:"shares,omitempty"` - // Percent of available CPUs usable by the container. - Percent *uint8 `json:"percent,omitempty"` + // Specifies the portion of processor cycles that this container can use as a percentage times 100. + Maximum *uint16 `json:"maximum,omitempty"` } // WindowsStorageResources contains storage resource management settings. @@ -463,10 +469,29 @@ type WindowsStorageResources struct { SandboxSize *uint64 `json:"sandboxSize,omitempty"` } -// WindowsNetworkResources contains network resource management settings. -type WindowsNetworkResources struct { - // EgressBandwidth is the maximum egress bandwidth in bytes per second. - EgressBandwidth *uint64 `json:"egressBandwidth,omitempty"` +// WindowsNetwork contains network settings for Windows containers. +type WindowsNetwork struct { + // List of HNS endpoints that the container should connect to. + EndpointList []string `json:"endpointList,omitempty"` + // Specifies if unqualified DNS name resolution is allowed. + AllowUnqualifiedDNSQuery bool `json:"allowUnqualifiedDNSQuery,omitempty"` + // Comma separated list of DNS suffixes to use for name resolution. + DNSSearchList []string `json:"DNSSearchList,omitempty"` + // Name (ID) of the container that we will share with the network stack. + NetworkSharedContainerName string `json:"networkSharedContainerName,omitempty"` +} + +// WindowsHyperV contains information for configuring a container to run with Hyper-V isolation. +type WindowsHyperV struct { + // UtilityVMPath is an optional path to the image used for the Utility VM. + UtilityVMPath string `json:"utilityVMPath,omitempty"` +} + +// LinuxSeccomp represents syscall restrictions +type LinuxSeccomp struct { + DefaultAction LinuxSeccompAction `json:"defaultAction"` + Architectures []Arch `json:"architectures,omitempty"` + Syscalls []LinuxSyscall `json:"syscalls,omitempty"` } // Arch used for additional architectures @@ -491,6 +516,8 @@ const ( ArchPPC64LE Arch = "SCMP_ARCH_PPC64LE" ArchS390 Arch = "SCMP_ARCH_S390" ArchS390X Arch = "SCMP_ARCH_S390X" + ArchPARISC Arch = "SCMP_ARCH_PARISC" + ArchPARISC64 Arch = "SCMP_ARCH_PARISC64" ) // LinuxSeccompAction taken upon Seccomp rule match @@ -529,7 +556,15 @@ type LinuxSeccompArg struct { // LinuxSyscall is used to match a syscall in Seccomp type LinuxSyscall struct { - Name string `json:"name"` + Names []string `json:"names"` Action LinuxSeccompAction `json:"action"` Args []LinuxSeccompArg `json:"args,omitempty"` } + +// LinuxIntelRdt has container runtime resource constraints +// for Intel RDT/CAT which introduced in Linux 4.10 kernel +type LinuxIntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema string `json:"l3CacheSchema,omitempty"` +} diff --git a/specs-go/version.go b/specs-go/version.go index 7b31f0285..dfcf0090e 100644 --- a/specs-go/version.go +++ b/specs-go/version.go @@ -11,7 +11,7 @@ const ( VersionPatch = 0 // VersionDev indicates development branch. Releases will be empty string. - VersionDev = "-rc4-dev" + VersionDev = "-rc5-dev" ) // Version is the specification version that the package types support. diff --git a/style.md b/style.md index bb5bced4f..1d42043e7 100644 --- a/style.md +++ b/style.md @@ -1,29 +1,45 @@ -# Style and conventions +# Style and conventions -## One sentence per line +## One sentence per line To keep consistency throughout the Markdown files in the Open Container spec all files should be formatted one sentence per line. This fixes two things: it makes diffing easier with git and it resolves fights about line wrapping length. For example, this paragraph will span three lines in the Markdown source. -## Traditionally hex settings should use JSON integers, not JSON strings +## Traditionally hex settings should use JSON integers, not JSON strings -For example, [`"classID": 1048577`][class-id] instead of `"classID": "0x100001"`. +For example, [`"classID": 1048577`](config-linux.md#network) instead of `"classID": "0x100001"`. The config JSON isn't enough of a UI to be worth jumping through string <-> integer hoops to support an 0x… form ([source][integer-over-hex]). -## Constant names should keep redundant prefixes +## Constant names should keep redundant prefixes -For example, `CAP_KILL` instead of `KILL` in [**`linux.capabilities`**][capabilities]. +For example, `CAP_KILL` instead of `KILL` in [**`process.capabilities`**](config.md#process). The redundancy reduction from removing the namespacing prefix is not useful enough to be worth trimming the upstream identifier ([source][keep-prefix]). -## Optional settings should not have pointer Go types +## Optional settings should not have pointer Go types Because in many cases the Go default for the type is a no-op in the spec (sources [here][no-pointer-for-strings], [here][no-pointer-for-slices], and [here][no-pointer-for-boolean]). The exceptions are entries where we need to distinguish between “not set” and “set to the Go default for that type” ([source][pointer-when-updates-require-changes]), and this decision should be made on a per-setting case. +## Links + +Internal links should be [relative links][markdown-relative-links] when linking to content within the repository. +Internal links should be used inline. + +External links should be collected at the bottom of a markdown file and used as referenced links. +See 'Referenced Links' in this [markdown quick reference][markdown-quick-reference]. +The use of referenced links in the markdown body helps to keep files clean and organized. +This also facilitates updates of external link targets on a per-file basis. + +Referenced links should be kept in two alphabetically sorted sets, a general reference section followed by a man page section. +To keep Pandoc happy, duplicate naming of links within pages listed in the Makefile's `DOC_FILES` variable should be avoided by appending an `_N` to the link tagname, where `N` is some number not currently in use. +The organization and style of an existing reference section should be maintained unless it violates these style guidelines. + +An exception to these rules is when a URL is needed contextually, for example when showing an explicit link to the reader. + ## Examples -### Anchoring +### Anchoring For any given section that provides a notable example, it is ideal to have it denoted with [markdown headers][markdown-headers]. The level of header should be such that it is a subheader of the header it is an example of. @@ -47,7 +63,7 @@ To use Some Topic, ... ``` -### Content +### Content Where necessary, the values in the example can be empty or unset, but accommodate with comments regarding this intention. @@ -86,8 +102,24 @@ Following is a fully populated example (not necessarily for copy/paste use) } ``` -[capabilities]: config-linux.md#capabilities -[class-id]: config-linux.md#network +### Links + +The following is an example of different types of links. +This is shown as a complete markdown file, where the referenced links are at the bottom. + +```markdown +The specification repository's [glossary](glossary.md) is where readers can find definitions of commonly used terms. + +Readers may click through to the [Open Containers namespace][open-containers] on [GitHub][github]. + +The URL for the Open Containers link above is: https://github.com/opencontainers + + +[github]: https://github.com +[open-containers]: https://github.com/opencontainers +``` + + [integer-over-hex]: https://github.com/opencontainers/runtime-spec/pull/267#r48360013 [keep-prefix]: https://github.com/opencontainers/runtime-spec/pull/159#issuecomment-138728337 [no-pointer-for-boolean]: https://github.com/opencontainers/runtime-spec/pull/290#r50296396 @@ -95,3 +127,5 @@ Following is a fully populated example (not necessarily for copy/paste use) [no-pointer-for-strings]: https://github.com/opencontainers/runtime-spec/pull/653#issue-200439192 [pointer-when-updates-require-changes]: https://github.com/opencontainers/runtime-spec/pull/317#r50932706 [markdown-headers]: https://help.github.com/articles/basic-writing-and-formatting-syntax/#headings +[markdown-quick-reference]: https://en.support.wordpress.com/markdown-quick-reference +[markdown-relative-links]: https://help.github.com/articles/basic-writing-and-formatting-syntax/#relative-links