From d80c8e317ff8709cb6dc85dc6cd9472f153e81dc Mon Sep 17 00:00:00 2001 From: Joao Grassi Date: Tue, 12 Sep 2023 16:54:28 +0200 Subject: [PATCH] BREAKING: Generate System metrics semconv from YAML + move attributes to their own namespace (#89) Co-authored-by: Armin Ruech Co-authored-by: Pablo Baeyens --- CHANGELOG.md | 27 + docs/system/system-metrics.md | 840 ++++++++++++++++++++++++------ model/metrics/system-metrics.yaml | 492 +++++++++++++++++ schema-next.yaml | 64 +++ 4 files changed, 1277 insertions(+), 146 deletions(-) create mode 100644 model/metrics/system-metrics.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e438617ba..4612cb77e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,33 @@ release. ([#270](https://github.com/open-telemetry/semantic-conventions/pull/270)) - Moved RPC streaming notes from metric brief section to notes section. ([#275](https://github.com/open-telemetry/semantic-conventions/pull/275)) +- BREAKING: Generate System metrics semconv from YAML. + ([#89](https://github.com/open-telemetry/semantic-conventions/pull/89)) + - Rename attributes for `system.cpu.*` metrics: + - `state` to `system.cpu.state` + - `cpu` to `system.cpu.logical_number` + - Rename attributes for `system.memory.*` metrics: + - `state` to `system.memory.state` + - Rename attributes for `system.paging.*` metrics: + - `state` to `system.paging.state` + - `type` to `system.paging.type` + - `direction` to `system.paging.direction` + - Rename attributes for `system.disk.*` metrics: + - `device` to `system.device` + - `direction` to `system.disk.direction` + - Rename attributes for `system.filesystem.*` metrics: + - `device` to `system.device` + - `state` to `system.filesystem.state` + - `type` to `system.filesystem.type` + - `mode` to `system.filesystem.mode` + - `mountpoint` to `system.filesystem.mountpoint` + - Rename attributes for `system.network.*` metrics: + - `device` to `system.device` + - `direction` to `system.network.direction` + - `protocol` to `network.protocol` + - `state` to `system.network.state` + - Rename attributes for `system.processes.*` metrics: + - `status` to `system.processes.status` ## v1.21.0 (2023-07-13) diff --git a/docs/system/system-metrics.md b/docs/system/system-metrics.md index 2f71ef86af..f17b799050 100644 --- a/docs/system/system-metrics.md +++ b/docs/system/system-metrics.md @@ -15,154 +15,701 @@ instruments not explicitly defined in the specification. -- [Metric Instruments](#metric-instruments) - * [`system.cpu.` - Processor metrics](#systemcpu---processor-metrics) - * [`system.memory.` - Memory metrics](#systemmemory---memory-metrics) - * [`system.paging.` - Paging/swap metrics](#systempaging---pagingswap-metrics) - * [`system.disk.` - Disk controller metrics](#systemdisk---disk-controller-metrics) - * [`system.filesystem.` - Filesystem metrics](#systemfilesystem---filesystem-metrics) - * [`system.network.` - Network metrics](#systemnetwork---network-metrics) - * [`system.processes.` - Aggregate system process metrics](#systemprocesses---aggregate-system-process-metrics) - * [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) +- [Processor Metrics](#processor-metrics) + * [Metric: `system.cpu.time`](#metric-systemcputime) + * [Metric: `system.cpu.utilization`](#metric-systemcpuutilization) + * [Metric: `system.cpu.physical.count`](#metric-systemcpuphysicalcount) + * [Metric: `system.cpu.logical.count`](#metric-systemcpulogicalcount) +- [Memory Metrics](#memory-metrics) + * [Metric: `system.memory.usage`](#metric-systemmemoryusage) + * [Metric: `system.memory.utilization`](#metric-systemmemoryutilization) +- [Paging/Swap Metrics](#pagingswap-metrics) + * [Metric: `system.paging.usage`](#metric-systempagingusage) + * [Metric: `system.paging.utilization`](#metric-systempagingutilization) + * [Metric: `system.paging.faults`](#metric-systempagingfaults) + * [Metric: `system.paging.operations`](#metric-systempagingoperations) +- [Disk Controller Metrics](#disk-controller-metrics) + * [Metric: `system.disk.io`](#metric-systemdiskio) + * [Metric: `system.disk.operations`](#metric-systemdiskoperations) + * [Metric: `system.disk.io_time`](#metric-systemdiskio_time) + * [Metric: `system.disk.operation_time`](#metric-systemdiskoperation_time) + * [Metric: `system.disk.merged`](#metric-systemdiskmerged) +- [Filesystem Metrics](#filesystem-metrics) + * [Metric: `system.filesystem.usage`](#metric-systemfilesystemusage) + * [Metric: `system.filesystem.utilization`](#metric-systemfilesystemutilization) +- [Network Metrics](#network-metrics) + * [Metric: `system.network.dropped`](#metric-systemnetworkdropped) + * [Metric: `system.network.packets`](#metric-systemnetworkpackets) + * [Metric: `system.network.errors`](#metric-systemnetworkerrors) + * [Metric: `system.network.io`](#metric-systemnetworkio) + * [Metric: `system.network.connections`](#metric-systemnetworkconnections) +- [Aggregate System Process Metrics](#aggregate-system-process-metrics) + * [Metric: `system.processes.count`](#metric-systemprocessescount) + * [Metric: `system.processes.created`](#metric-systemprocessescreated) +- [`system.{os}.` - OS Specific System Metrics](#systemos---os-specific-system-metrics) -## Metric Instruments - -### `system.cpu.` - Processor metrics - -**Description:** System level processor metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key(s) | Attribute Values | -| ------------------------- | ---------------------------------------------------------------------------------------------------------------- | ----- | ------------------------------------------------- | ---------- | ---------------- | ----------------------------------- | -| system.cpu.time | Seconds each logical CPU spent on each mode | s | Counter | Double | state | idle, user, system, interrupt, etc. | -| | | | | | cpu | Logical CPU number [0..n-1] | -| system.cpu.utilization | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs | 1 | Gauge | Double | state | idle, user, system, interrupt, etc. | -| | | | | | cpu | Logical CPU number (0..n) | -| system.cpu.physical.count | Reports the number of actual physical processor cores on the hardware | {cpu} | UpDownCounter | Int64 | | | -| system.cpu.logical.count | Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking | {cpu} | UpDownCounter | Int64 | | | - -### `system.memory.` - Memory metrics - -**Description:** System level memory metrics. This does not include [paging/swap -memory](#systempaging---pagingswap-metrics). - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ------------------------- | ----------- | ----- | ------------------------------------------------- | ---------- | ------------- | ------------------------ | -| system.memory.usage | | By | UpDownCounter | Int64 | state | used, free, cached, etc. | -| system.memory.utilization | | 1 | Gauge | Double | state | used, free, cached, etc. | - -### `system.paging.` - Paging/swap metrics - -**Description:** System level paging/swap memory metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|---------------------------|-------------------------------------|--------------|---------------------------------------------------|------------|---------------|------------------| -| system.paging.usage | Unix swap or windows pagefile usage | By | UpDownCounter | Int64 | state | used, free | -| system.paging.utilization | | 1 | Gauge | Double | state | used, free | -| system.paging.faults | | {fault} | Counter | Int64 | type | major, minor | -| system.paging.operations | | {operation} | Counter | Int64 | type | major, minor | -| | | | | | direction | in, out | - -### `system.disk.` - Disk controller metrics - -**Description:** System level disk performance metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|--------------------------------------------|-------------------------------------------------|--------------|---------------------------------------------------|------------|---------------|------------------| -| system.disk.io | | By | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.operations | | {operation} | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.io_time\[1\] | Time disk spent activated | s | Counter | Double | device | (identifier) | -| system.disk.operation_time\[2\] | Sum of the time each operation took to complete | s | Counter | Double | device | (identifier) | -| | | | | | direction | read, write | -| system.disk.merged | | {operation} | Counter | Int64 | device | (identifier) | -| | | | | | direction | read, write | - -1 The real elapsed time ("wall clock") -used in the I/O path (time from operations running in parallel are not -counted). Measured as: - -- Linux: Field 13 from -[procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) -- Windows: The complement of ["Disk\% Idle -Time"](https://docs.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained:~:text=%25%20Idle%20Time,Idle\)%20to%200%20(meaning%20always%20busy).) -performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` - -2 Because it is the sum of time each -request took, parallel-issued requests each contribute to make the count -grow. Measured as: - -- Linux: Fields 7 & 11 from -[procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) -- Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" -perf counter (similar for Writes) - -### `system.filesystem.` - Filesystem metrics - -**Description:** System level filesystem metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ----------------------------- | ----------- | ----- | ------------------------------------------------- | ---------- | ------------- | -------------------- | -| system.filesystem.usage | | By | UpDownCounter | Int64 | device | (identifier) | -| | | | | | state | used, free, reserved | -| | | | | | type | ext4, tmpfs, etc. | -| | | | | | mode | rw, ro, etc. | -| | | | | | mountpoint | (path) | -| system.filesystem.utilization | | 1 | Gauge | Double | device | (identifier) | -| | | | | | state | used, free, reserved | -| | | | | | type | ext4, tmpfs, etc. | -| | | | | | mode | rw, ro, etc. | -| | | | | | mountpoint | (path) | - -### `system.network.` - Network metrics - -**Description:** System level network metrics. - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -|----------------------------------------|-------------------------------------------------------------------------------|---------------|---------------------------------------------------|------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| system.network.dropped\[1\] | Count of packets that are dropped or discarded even though there was no error | {packet} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.packets | | {packet} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.errors\[2\] | Count of network errors detected | {error} | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.io | | By | Counter | Int64 | device | (identifier) | -| | | | | | direction | transmit, receive | -| system.network.connections | | {connection} | UpDownCounter | Int64 | device | (identifier) | -| | | | | | protocol | tcp, udp, [etc.](https://en.wikipedia.org/wiki/Transport_layer#Protocols) | -| | | | | | state | If specified, SHOULD be one of: close, close_wait, closing, delete, established, fin_wait_1, fin_wait_2, last_ack, listen, syn_recv, syn_sent, time_wait. A stateless protocol MUST NOT set this attribute. | - -1 Measured as: - -- Linux: the `drop` column in `/proc/dev/net` -([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). -- Windows: -[`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) -from -[`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). - -2 Measured as: - -- Linux: the `errs` column in `/proc/dev/net` -([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). -- Windows: -[`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) -from -[`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). - -### `system.processes.` - Aggregate system process metrics - -**Description:** System level aggregate process metrics. For metrics at the -individual process level, see [process metrics](process-metrics.md). - -| Name | Description | Units | Instrument Type ([*](/docs/general/metrics.md#instrument-types)) | Value Type | Attribute Key | Attribute Values | -| ------------------------ | --------------------------------------------------------- | ----------- | ------------------------------------------------- | ---------- | ------------- | ---------------------------------------------------------------------------------------------- | -| system.processes.count | Total number of processes in each state | {process} | UpDownCounter | Int64 | status | running, sleeping, [etc.](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | -| system.processes.created | Total number of processes created over uptime of the host | {process} | Counter | Int64 | - | - | - -### `system.{os}.` - OS Specific System Metrics +## Processor Metrics + +**Description:** System level processor metrics captured under the namespace `system.cpu`. + +### Metric: `system.cpu.time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.time` | Counter | `s` | Seconds each logical CPU spent on each mode | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.cpu.logical_number` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | + +`system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `user` | user | +| `system` | system | +| `nice` | nice | +| `idle` | idle | +| `iowait` | iowait | +| `interrupt` | interrupt | +| `steal` | steal | + + +### Metric: `system.cpu.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.utilization` | Gauge | `1` | Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.cpu.logical_number` | int | The logical CPU number [0..n-1] | `1` | Recommended | +| `system.cpu.state` | string | The state of the CPU | `idle`; `interrupt` | Recommended | + +`system.cpu.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `user` | user | +| `system` | system | +| `nice` | nice | +| `idle` | idle | +| `iowait` | iowait | +| `interrupt` | interrupt | +| `steal` | steal | + + +### Metric: `system.cpu.physical.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.physical.count` | UpDownCounter | `{cpu}` | Reports the number of actual physical processor cores on the hardware | + + + + + +### Metric: `system.cpu.logical.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.cpu.logical.count` | UpDownCounter | `{cpu}` | Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking | + + + + + +## Memory Metrics + +**Description:** System level memory metrics capture under the namespace `system.memory`. +This does not include [paging/swap memory](#pagingswap-metrics). + +### Metric: `system.memory.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.memory.usage` | UpDownCounter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.memory.state` | string | The memory state | `free`; `cached` | Recommended | + +`system.memory.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `total` | total | +| `used` | used | +| `free` | free | +| `shared` | shared | +| `buffers` | buffers | +| `cached` | cached | + + +### Metric: `system.memory.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.memory.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.memory.state` | string | The memory state | `free`; `cached` | Recommended | + +`system.memory.state` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `total` | total | +| `used` | used | +| `free` | free | +| `shared` | shared | +| `buffers` | buffers | +| `cached` | cached | + + +## Paging/Swap Metrics + +**Description:** System level paging/swap memory metrics captured under the namespace `system.paging`. + +### Metric: `system.paging.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.usage` | UpDownCounter | `By` | Unix swap or windows pagefile usage | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.state` | string | The memory paging state | `free` | Recommended | + +`system.paging.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | + + +### Metric: `system.paging.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.state` | string | The memory paging state | `free` | Recommended | + +`system.paging.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | + + +### Metric: `system.paging.faults` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.faults` | Counter | `{fault}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.type` | string | The memory paging type | `minor` | Recommended | + +`system.paging.type` MUST be one of the following: + +| Value | Description | +|---|---| +| `major` | major | +| `minor` | minor | + + +### Metric: `system.paging.operations` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.paging.operations` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.paging.direction` | string | The paging access direction | `in` | Recommended | +| `system.paging.type` | string | The memory paging type | `minor` | Recommended | + +`system.paging.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `in` | in | +| `out` | out | + +`system.paging.type` MUST be one of the following: + +| Value | Description | +|---|---| +| `major` | major | +| `minor` | minor | + + +## Disk Controller Metrics + +**Description:** System level disk performance metrics captured under the namespace `system.disk`. + +### Metric: `system.disk.io` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.io` | Counter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.operations` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.operations` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.io_time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.io_time` | Counter | `s` | Time disk spent activated [1] | + +**[1]:** The real elapsed time ("wall clock") used in the I/O path (time from operations running in parallel are not counted). Measured as: + +- Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) +- Windows: The complement of + ["Disk\% Idle Time"](https://learn.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | + + +### Metric: `system.disk.operation_time` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.operation_time` | Counter | `s` | Sum of the time each operation took to complete [1] | + +**[1]:** Because it is the sum of time each request took, parallel-issued requests each contribute to make the count grow. Measured as: + +- Linux: Fields 7 & 11 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) +- Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" perf counter (similar for Writes) + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +### Metric: `system.disk.merged` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.disk.merged` | Counter | `{operation}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.disk.direction` | string | The disk operation direction | `read` | Recommended | + +`system.disk.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `read` | read | +| `write` | write | + + +## Filesystem Metrics + +**Description:** System level filesystem metrics captured under the namespace `system.filesystem`. + +### Metric: `system.filesystem.usage` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.filesystem.usage` | UpDownCounter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.filesystem.mode` | string | The filesystem mode | `rw, ro` | Recommended | +| `system.filesystem.mountpoint` | string | The filesystem mount path | `/mnt/data` | Recommended | +| `system.filesystem.state` | string | The filesystem state | `used` | Recommended | +| `system.filesystem.type` | string | The filesystem type | `ext4` | Recommended | + +`system.filesystem.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `reserved` | reserved | + +`system.filesystem.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `fat32` | fat32 | +| `exfat` | exfat | +| `ntfs` | ntfs | +| `refs` | refs | +| `hfsplus` | hfsplus | +| `ext4` | ext4 | + + +### Metric: `system.filesystem.utilization` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.filesystem.utilization` | Gauge | `1` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.filesystem.mode` | string | The filesystem mode | `rw, ro` | Recommended | +| `system.filesystem.mountpoint` | string | The filesystem mount path | `/mnt/data` | Recommended | +| `system.filesystem.state` | string | The filesystem state | `used` | Recommended | +| `system.filesystem.type` | string | The filesystem type | `ext4` | Recommended | + +`system.filesystem.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `used` | used | +| `free` | free | +| `reserved` | reserved | + +`system.filesystem.type` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `fat32` | fat32 | +| `exfat` | exfat | +| `ntfs` | ntfs | +| `refs` | refs | +| `hfsplus` | hfsplus | +| `ext4` | ext4 | + + +## Network Metrics + +**Description:** System level network metrics captured under the namespace `system.network`. + +### Metric: `system.network.dropped` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.dropped` | Counter | `{packet}` | Count of packets that are dropped or discarded even though there was no error [1] | + +**[1]:** Measured as: + +- Linux: the `drop` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)) +- Windows: [`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2) + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.packets` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.packets` | Counter | `{packet}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.errors` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.errors` | Counter | `{error}` | Count of network errors detected [1] | + +**[1]:** Measured as: + +- Linux: the `errs` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). +- Windows: [`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.io` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.io` | Counter | `By` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.direction` | string | | `transmit` | Recommended | + +`system.network.direction` MUST be one of the following: + +| Value | Description | +|---|---| +| `transmit` | transmit | +| `receive` | receive | + + +### Metric: `system.network.connections` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.network.connections` | UpDownCounter | `{connection}` | | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| [`network.transport`](../general/attributes.md) | string | [OSI Transport Layer](https://osi-model.com/transport-layer/) or [Inter-process Communication method](https://en.wikipedia.org/wiki/Inter-process_communication). The value SHOULD be normalized to lowercase. | `tcp`; `udp` | Recommended | +| `system.device` | string | The device identifier | `(identifier)` | Recommended | +| `system.network.state` | string | A stateless protocol MUST NOT set this attribute | `close_wait` | Recommended | + +`network.transport` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `tcp` | TCP | +| `udp` | UDP | +| `pipe` | Named or anonymous pipe. See note below. | +| `unix` | Unix domain socket | + +`system.network.state` MUST be one of the following: + +| Value | Description | +|---|---| +| `close` | close | +| `close_wait` | close_wait | +| `closing` | closing | +| `delete` | delete | +| `established` | established | +| `fin_wait_1` | fin_wait_1 | +| `fin_wait_2` | fin_wait_2 | +| `last_ack` | last_ack | +| `listen` | listen | +| `syn_recv` | syn_recv | +| `syn_sent` | syn_sent | +| `time_wait` | time_wait | + +## Aggregate System Process Metrics + +**Description:** System level aggregate process metrics captured under the namespace `system.process`. +For metrics at the individual process level, see [process metrics](process-metrics.md). + +### Metric: `system.processes.count` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.processes.count` | UpDownCounter | `{process}` | Total number of processes in each state | + + + +| Attribute | Type | Description | Examples | Requirement Level | +|---|---|---|---|---| +| `system.processes.status` | string | The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) | `running` | Recommended | + +`system.processes.status` has the following list of well-known values. If one of them applies, then the respective value MUST be used, otherwise a custom value MAY be used. + +| Value | Description | +|---|---| +| `running` | running | +| `sleeping` | sleeping | +| `stopped` | stopped | +| `defunct` | defunct | + + +### Metric: `system.processes.created` + +This metric is [recommended][MetricRecommended]. + + +| Name | Instrument Type | Unit (UCUM) | Description | +| -------- | --------------- | ----------- | -------------- | +| `system.processes.created` | Counter | `{process}` | Total number of processes created over uptime of the host | + + + + + +## `system.{os}.` - OS Specific System Metrics Instrument names for system level metrics that have different and conflicting meaning across multiple OSes should be prefixed with `system.{os}.` and @@ -190,4 +737,5 @@ An instrument for load average over 1 minute on Linux could be named `system.linux.cpu.load_1m`, reusing the `cpu` name proposed above and having an `{os}` prefix to split this metric across OSes. -[DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md +[DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.22.0/specification/document-status.md +[MetricRecommended]: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.22.0/specification/metrics/metric-requirement-level.md#recommended diff --git a/model/metrics/system-metrics.yaml b/model/metrics/system-metrics.yaml new file mode 100644 index 0000000000..41c508f3d2 --- /dev/null +++ b/model/metrics/system-metrics.yaml @@ -0,0 +1,492 @@ +groups: + # General system attributes + - id: attributes.system + prefix: system + type: attribute_group + brief: "Describes System metric attributes" + attributes: + - id: device + type: string + brief: "The device identifier" + examples: ["(identifier)"] + + # system.cpu.* metrics and attribute group + - id: attributes.system.cpu + prefix: system.cpu + type: attribute_group + brief: "Describes System CPU metric attributes" + attributes: + - id: state + type: + allow_custom_values: true + members: + - id: user + value: 'user' + - id: system + value: 'system' + - id: nice + value: 'nice' + - id: idle + value: 'idle' + - id: iowait + value: 'iowait' + - id: interrupt + value: 'interrupt' + - id: steal + value: 'steal' + brief: "The state of the CPU" + examples: ["idle", "interrupt"] + - id: logical_number + type: int + brief: "The logical CPU number [0..n-1]" + examples: [1] + + - id: metric.system.cpu.time + type: metric + metric_name: system.cpu.time + brief: "Seconds each logical CPU spent on each mode" + instrument: counter + unit: "s" + attributes: + - ref: system.cpu.state + - ref: system.cpu.logical_number + + - id: metric.system.cpu.utilization + type: metric + metric_name: system.cpu.utilization + brief: "Difference in system.cpu.time since the last measurement, divided by the elapsed time and number of logical CPUs" + instrument: gauge + unit: "1" + attributes: + - ref: system.cpu.state + - ref: system.cpu.logical_number + + - id: metric.system.cpu.physical.count + type: metric + metric_name: system.cpu.physical.count + brief: "Reports the number of actual physical processor cores on the hardware" + instrument: updowncounter + unit: "{cpu}" + attributes: [] + + - id: metric.system.cpu.logical.count + type: metric + metric_name: system.cpu.logical.count + brief: "Reports the number of logical (virtual) processor cores created by the operating system to manage multitasking" + instrument: updowncounter + unit: "{cpu}" + attributes: [] + + # sytem.memory.* metrics and attribute group + - id: attributes.system.memory + prefix: system.memory + type: attribute_group + brief: "Describes System Memory metric attributes" + attributes: + - id: state + type: + allow_custom_values: true + members: + - id: total + value: 'total' + - id: used + value: 'used' + - id: free + value: 'free' + - id: shared + value: 'shared' + - id: buffers + value: 'buffers' + - id: cached + value: 'cached' + brief: "The memory state" + examples: ["free", "cached"] + + - id: metric.system.memory.usage + type: metric + metric_name: system.memory.usage + brief: "" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.memory.state + + - id: metric.system.memory.utilization + type: metric + metric_name: system.memory.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.memory.state + + # system.paging.* metrics and attribute group + - id: attributes.system.paging + prefix: system.paging + type: attribute_group + brief: "Describes System Memory Paging metric attributes" + attributes: + - id: state + type: + allow_custom_values: false + members: + - id: used + value: 'used' + - id: free + value: 'free' + brief: "The memory paging state" + examples: ["free"] + - id: type + type: + allow_custom_values: false + members: + - id: major + value: 'major' + - id: minor + value: 'minor' + brief: "The memory paging type" + examples: ["minor"] + - id: direction + type: + allow_custom_values: false + members: + - id: in + value: 'in' + - id: out + value: 'out' + brief: "The paging access direction" + examples: ["in"] + - id: metric.system.paging.usage + type: metric + metric_name: system.paging.usage + brief: "Unix swap or windows pagefile usage" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.paging.state + + - id: metric.system.paging.utilization + type: metric + metric_name: system.paging.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.paging.state + + - id: metric.system.paging.faults + type: metric + metric_name: system.paging.faults + brief: "" + instrument: counter + unit: "{fault}" + attributes: + - ref: system.paging.type + + - id: metric.system.paging.operations + type: metric + metric_name: system.paging.operations + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.paging.type + - ref: system.paging.direction + + # system.disk.* metrics and attribute group + - id: attributes.system.disk + prefix: system.disk + type: attribute_group + brief: "Describes System Disk metric attributes" + attributes: + - id: direction + type: + allow_custom_values: false + members: + - id: read + value: 'read' + - id: write + value: 'write' + brief: "The disk operation direction" + examples: ["read"] + + - id: metric.system.disk.io + type: metric + metric_name: system.disk.io + brief: "" + instrument: counter + unit: "By" + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.operations + type: metric + metric_name: system.disk.operations + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.io_time + type: metric + metric_name: system.disk.io_time + brief: "Time disk spent activated" + instrument: counter + unit: "s" + note: | + The real elapsed time ("wall clock") used in the I/O path (time from operations running in parallel are not counted). Measured as: + + - Linux: Field 13 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) + - Windows: The complement of + ["Disk\% Idle Time"](https://learn.microsoft.com/en-us/archive/blogs/askcore/windows-performance-monitor-disk-counters-explained#windows-performance-monitor-disk-counters-explained) + performance counter: `uptime * (100 - "Disk\% Idle Time") / 100` + attributes: + - ref: system.device + + - id: metric.system.disk.operation_time + type: metric + metric_name: system.disk.operation_time + brief: "Sum of the time each operation took to complete" + instrument: counter + unit: "s" + note: | + Because it is the sum of time each request took, parallel-issued requests each contribute to make the count grow. Measured as: + + - Linux: Fields 7 & 11 from [procfs-diskstats](https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats) + - Windows: "Avg. Disk sec/Read" perf counter multiplied by "Disk Reads/sec" perf counter (similar for Writes) + attributes: + - ref: system.device + - ref: system.disk.direction + + - id: metric.system.disk.merged + type: metric + metric_name: system.disk.merged + brief: "" + instrument: counter + unit: "{operation}" + attributes: + - ref: system.device + - ref: system.disk.direction + + # system.filesystem.* metrics and attribute group + - id: attributes.system.filesystem + prefix: system.filesystem + type: attribute_group + brief: "Describes Filesystem metric attributes" + attributes: + - id: state + brief: "The filesystem state" + type: + allow_custom_values: false + members: + - id: used + value: 'used' + - id: free + value: 'free' + - id: reserved + value: 'reserved' + examples: ["used"] + - id: type + type: + allow_custom_values: true + members: + - id: fat32 + value: 'fat32' + - id: exfat + value: 'exfat' + - id: ntfs + value: 'ntfs' + - id: refs + value: 'refs' + - id: hfsplus + value: 'hfsplus' + - id: ext4 + value: 'ext4' + brief: "The filesystem type" + examples: ["ext4"] + - id: mode + type: string + brief: "The filesystem mode" + examples: ["rw, ro"] + - id: mountpoint + type: string + brief: "The filesystem mount path" + examples: ["/mnt/data"] + + - id: metric.system.filesystem.usage + type: metric + metric_name: system.filesystem.usage + brief: "" + instrument: updowncounter + unit: "By" + attributes: + - ref: system.device + - ref: system.filesystem.state + - ref: system.filesystem.type + - ref: system.filesystem.mode + - ref: system.filesystem.mountpoint + + - id: metric.system.filesystem.utilization + type: metric + metric_name: system.filesystem.utilization + brief: "" + instrument: gauge + unit: "1" + attributes: + - ref: system.device + - ref: system.filesystem.state + - ref: system.filesystem.type + - ref: system.filesystem.mode + - ref: system.filesystem.mountpoint + + # system.network.* metrics and attribute group + - id: attributes.system.network + prefix: system.network + type: attribute_group + brief: "Describes Network metric attributes" + attributes: + - id: direction + type: + allow_custom_values: false + members: + - id: transmit + value: 'transmit' + - id: receive + value: 'receive' + brief: "" + examples: ["transmit"] + - id: state + type: + allow_custom_values: false + members: + - id: close + value: 'close' + - id: close_wait + value: 'close_wait' + - id: closing + value: 'closing' + - id: delete + value: 'delete' + - id: established + value: 'established' + - id: fin_wait_1 + value: 'fin_wait_1' + - id: fin_wait_2 + value: 'fin_wait_2' + - id: last_ack + value: 'last_ack' + - id: listen + value: 'listen' + - id: syn_recv + value: 'syn_recv' + - id: syn_sent + value: 'syn_sent' + - id: time_wait + value: 'time_wait' + brief: "A stateless protocol MUST NOT set this attribute" + examples: ["close_wait"] + + - id: metric.system.network.dropped + type: metric + metric_name: system.network.dropped + brief: "Count of packets that are dropped or discarded even though there was no error" + instrument: counter + unit: "{packet}" + note: | + Measured as: + + - Linux: the `drop` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)) + - Windows: [`InDiscards`/`OutDiscards`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2) + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.packets + type: metric + metric_name: system.network.packets + brief: "" + instrument: counter + unit: "{packet}" + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.errors + type: metric + metric_name: system.network.errors + brief: "Count of network errors detected" + instrument: counter + unit: "{error}" + note: | + Measured as: + + - Linux: the `errs` column in `/proc/dev/net` ([source](https://web.archive.org/web/20180321091318/http://www.onlamp.com/pub/a/linux/2000/11/16/LinuxAdmin.html)). + - Windows: [`InErrors`/`OutErrors`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2) + from [`GetIfEntry2`](https://docs.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2). + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.io + type: metric + metric_name: system.network.io + brief: "" + instrument: counter + unit: "By" + attributes: + - ref: system.device + - ref: system.network.direction + + - id: metric.system.network.connections + type: metric + metric_name: system.network.connections + brief: "" + instrument: updowncounter + unit: "{connection}" + attributes: + - ref: system.device + - ref: system.network.state + - ref: network.transport + + # system.processes.* metrics and attribute group + - id: attributes.system.processes + prefix: system.processes + type: attribute_group + brief: "Describes System Process metric attributes" + attributes: + - id: status + type: + allow_custom_values: true + members: + - id: running + value: 'running' + - id: sleeping + value: 'sleeping' + - id: stopped + value: 'stopped' + - id: defunct + value: 'defunct' + brief: > + The process state, e.g., [Linux Process State Codes](https://man7.org/linux/man-pages/man1/ps.1.html#PROCESS_STATE_CODES) + examples: ["running"] + + + - id: metric.system.processes.count + type: metric + metric_name: system.processes.count + brief: "Total number of processes in each state" + instrument: updowncounter + unit: "{process}" + attributes: + - ref: system.processes.status + + - id: metric.system.processes.created + type: metric + metric_name: system.processes.created + brief: "Total number of processes created over uptime of the host" + instrument: counter + unit: "{process}" diff --git a/schema-next.yaml b/schema-next.yaml index acc97ca7b8..609605b787 100644 --- a/schema-next.yaml +++ b/schema-next.yaml @@ -63,6 +63,70 @@ versions: - jvm.buffer.usage - jvm.buffer.limit - jvm.buffer.count + # https://github.com/open-telemetry/semantic-conventions/pull/89 + - rename_attributes: + attribute_map: + state: system.cpu.state + cpu: system.cpu.logical_number + apply_to_metrics: + - system.cpu.time + - system.cpu.utilization + - rename_attributes: + attribute_map: + state: system.memory.state + apply_to_metrics: + - system.memory.usage + - system.memory.utilization + - rename_attributes: + attribute_map: + state: system.paging.state + apply_to_metrics: + - system.paging.usage + - system.paging.utilization + - rename_attributes: + attribute_map: + type: system.paging.type + direction: system.paging.direction + apply_to_metrics: + - system.paging.faults + - system.paging.operations + - rename_attributes: + attribute_map: + device: system.device + direction: system.disk.direction + apply_to_metrics: + - system.disk.io + - system.disk.operations + - system.disk.io_time + - system.disk.operation_time + - system.disk.merged + - rename_attributes: + attribute_map: + device: system.device + state: system.filesystem.state + type: system.filesystem.type + mode: system.filesystem.mode + mountpoint: system.filesystem.mountpoint + apply_to_metrics: + - system.filesystem.usage + - system.filesystem.utilization + - rename_attributes: + attribute_map: + device: system.device + direction: system.network.direction + protocol: network.protocol + state: system.network.state + apply_to_metrics: + - system.network.dropped + - system.network.packets + - system.network.errors + - system.network.io + - system.network.connections + - rename_attributes: + attribute_map: + status: system.processes.status + apply_to_metrics: + - system.processes.count 1.21.0: spans: changes: