Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: pool detection and metrics gathering for ZFS >= 2.1.x #10099

Merged
merged 3 commits into from
Dec 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions plugins/inputs/zfs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics):
- wcnt (integer, count)
- rcnt (integer, count)

For ZFS >= 2.1.x the format has changed significantly:

- zfs_pool
- writes (integer, count)
- nwritten (integer, bytes)
- reads (integer, count)
- nread (integer, bytes)
- nunlinks (integer, count)
- nunlinked (integer, count)

On FreeBSD:

- zfs_pool
Expand Down Expand Up @@ -229,6 +239,7 @@ On FreeBSD:
- Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only)
- dataset - ZFS >= 2.1.x only. (Linux only)

- Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for.
Expand Down
140 changes: 119 additions & 21 deletions plugins/inputs/zfs/zfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package zfs

import (
"errors"
"fmt"
"path/filepath"
"strconv"
Expand All @@ -14,22 +15,56 @@ import (
"github.com/influxdata/telegraf/plugins/inputs"
)

type metricsVersion uint8

const (
unknown metricsVersion = iota
v1
v2
)

type poolInfo struct {
name string
ioFilename string
version metricsVersion
}

func probeVersion(kstatPath string) (metricsVersion, []string, error) {
poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath))

// From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed.
// Because of this we need to determine how to fallback differently.
if err != nil {
return unknown, poolsDirs, err
}

if len(poolsDirs) > 0 {
return v2, poolsDirs, nil
}

// Fallback to the old kstat in case of an older ZFS version.
poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath))
if err != nil {
return unknown, poolsDirs, err
}

return v1, poolsDirs, nil
}

func getPools(kstatPath string) []poolInfo {
func getPools(kstatPath string) ([]poolInfo, error) {
pools := make([]poolInfo, 0)
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")
version, poolsDirs, err := probeVersion(kstatPath)
if err != nil {
return nil, err
}

for _, poolDir := range poolsDirs {
poolDirSplit := strings.Split(poolDir, "/")
pool := poolDirSplit[len(poolDirSplit)-2]
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir})
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version})
}

return pools
return pools, nil
}

func getTags(pools []poolInfo) map[string]string {
Expand All @@ -45,36 +80,99 @@ func getTags(pools []poolInfo) map[string]string {
return map[string]string{"pools": poolNames}
}

func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename)
if err != nil {
return err
}

if len(lines) != 3 {
return err
func gather(lines []string, fileLines int) ([]string, []string, error) {
if len(lines) != fileLines {
return nil, nil, errors.New("expected lines in kstat does not match")
}

keys := strings.Fields(lines[1])
values := strings.Fields(lines[2])
if len(keys) != len(values) {
return nil, nil, fmt.Errorf("key and value count don't match Keys:%v Values:%v", keys, values)
}

keyCount := len(keys)
return keys, values, nil
}

if keyCount != len(values) {
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
func gatherV1(lines []string) (map[string]interface{}, error) {
fileLines := 3
keys, values, err := gather(lines, fileLines)
if err != nil {
return nil, err
}

tag := map[string]string{"pool": pool.name}
fields := make(map[string]interface{})
for i := 0; i < keyCount; i++ {
for i := 0; i < len(keys); i++ {
value, err := strconv.ParseInt(values[i], 10, 64)
if err != nil {
return err
return nil, err
}

fields[keys[i]] = value
}
acc.AddFields("zfs_pool", fields, tag)

return fields, nil
}

// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this:
// 36 1 0x01 7 2160 5214787391 73405258558961
// name type data
// dataset_name 7 rpool/ROOT/pve-1
// writes 4 409570
// nwritten 4 2063419969
// reads 4 22108699
// nread 4 63067280992
// nunlinks 4 13849
// nunlinked 4 13848
//
// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61
func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) {
fileLines := 9
_, _, err := gather(lines, fileLines)
if err != nil {
return nil, err
}

tags["dataset"] = strings.Fields(lines[2])[2]
fields := make(map[string]interface{})
for i := 3; i < len(lines); i++ {
lineFields := strings.Fields(lines[i])
fieldName := lineFields[0]
fieldData := lineFields[2]
value, err := strconv.ParseInt(fieldData, 10, 64)
if err != nil {
return nil, err
}

fields[fieldName] = value
}

return fields, nil
}

func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename)
if err != nil {
return err
}

var fields map[string]interface{}
var gatherErr error
tags := map[string]string{"pool": pool.name}
switch pool.version {
case v1:
fields, gatherErr = gatherV1(lines)
case v2:
fields, gatherErr = gatherV2(lines, tags)
case unknown:
return errors.New("Unknown metrics version detected")
}

if gatherErr != nil {
return err
}

acc.AddFields("zfs_pool", fields, tags)
return nil
}

Expand All @@ -93,10 +191,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatPath = "/proc/spl/kstat/zfs"
}

pools := getPools(kstatPath)
pools, err := getPools(kstatPath)
tags := getTags(pools)

if z.PoolMetrics {
if z.PoolMetrics && err == nil {
srebhan marked this conversation as resolved.
Show resolved Hide resolved
for _, pool := range pools {
err := gatherPoolStats(pool, acc)
if err != nil {
Expand Down
34 changes: 34 additions & 0 deletions plugins/inputs/zfs/zfs_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
`
const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512
name type data
dataset_name 7 HOME
writes 4 978
nwritten 4 6450688
reads 4 22
nread 4 1884160
nunlinks 4 14148
nunlinked 4 14147
`
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
name type data
zil_commit_count 4 77
Expand Down Expand Up @@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) {

acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)

err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644)
require.NoError(t, err)

acc.Metrics = nil

err = z.Gather(&acc)
require.NoError(t, err)

tags["dataset"] = "HOME"

poolMetrics = getPoolMetricsNewFormat()
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}
Expand Down Expand Up @@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} {
"rcnt": int64(0),
}
}

func getPoolMetricsNewFormat() map[string]interface{} {
return map[string]interface{}{
"nread": int64(1884160),
"nunlinked": int64(14147),
"nunlinks": int64(14148),
"nwritten": int64(6450688),
"reads": int64(22),
"writes": int64(978),
}
}