Skip to content

Commit

Permalink
fix: pool detection and metrics gathering for ZFS >= 2.1.x
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronjwood committed Nov 27, 2021
1 parent 9bd0c61 commit 995a6c5
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 20 deletions.
13 changes: 13 additions & 0 deletions plugins/inputs/zfs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics):
- wcnt (integer, count)
- rcnt (integer, count)

For ZFS >= 2.1.x the format has changed significantly:

- zfs_pool
- writes (integer, count)
- nwritten (integer, bytes)
- reads (integer, count)
- nread (integer, bytes)
- nunlinks (integer, count)
- nunlinked (integer, count)

On FreeBSD:

- zfs_pool
Expand Down Expand Up @@ -229,6 +239,9 @@ On FreeBSD:
- Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only)
- pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only)
- dataset - ZFS >= 2.1.x only. (Linux only)

- Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for.
Expand Down
136 changes: 116 additions & 20 deletions plugins/inputs/zfs/zfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package zfs

import (
"errors"
"fmt"
"path/filepath"
"strconv"
Expand All @@ -14,22 +15,56 @@ import (
"github.com/influxdata/telegraf/plugins/inputs"
)

type metricsVersion uint8

const (
unknown metricsVersion = iota
v1
v2
)

type poolInfo struct {
name string
ioFilename string
version metricsVersion
}

func probeVersion(kstatPath string) (metricsVersion, []string, error) {
poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath))

// From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed.
// Because of this we need to determine how to fallback differently.
if err != nil {
return unknown, poolsDirs, err
}

if len(poolsDirs) > 0 {
return v2, poolsDirs, nil
}

// Fallback to the old kstat in case of an older ZFS version.
poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath))
if err != nil {
return unknown, poolsDirs, err
}

return v1, poolsDirs, nil
}

func getPools(kstatPath string) []poolInfo {
func getPools(kstatPath string) ([]poolInfo, error) {
pools := make([]poolInfo, 0)
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")
version, poolsDirs, err := probeVersion(kstatPath)
if err != nil {
return nil, err
}

for _, poolDir := range poolsDirs {
poolDirSplit := strings.Split(poolDir, "/")
pool := poolDirSplit[len(poolDirSplit)-2]
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir})
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version})
}

return pools
return pools, nil
}

func getTags(pools []poolInfo) map[string]string {
Expand All @@ -45,36 +80,97 @@ func getTags(pools []poolInfo) map[string]string {
return map[string]string{"pools": poolNames}
}

func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename)
if err != nil {
return err
}

if len(lines) != 3 {
return err
func gatherV1(lines []string) (map[string]interface{}, error) {
fileLines := 3
if len(lines) != fileLines {
return nil, errors.New("Expected lines in kstat does not match")
}

keys := strings.Fields(lines[1])
values := strings.Fields(lines[2])

keyCount := len(keys)

if keyCount != len(values) {
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
return nil, fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
}

tag := map[string]string{"pool": pool.name}
fields := make(map[string]interface{})
for i := 0; i < keyCount; i++ {
value, err := strconv.ParseInt(values[i], 10, 64)
if err != nil {
return err
return nil, err
}

fields[keys[i]] = value
}
acc.AddFields("zfs_pool", fields, tag)

return fields, nil
}

// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this:
// 36 1 0x01 7 2160 5214787391 73405258558961
// name type data
// dataset_name 7 rpool/ROOT/pve-1
// writes 4 409570
// nwritten 4 2063419969
// reads 4 22108699
// nread 4 63067280992
// nunlinks 4 13849
// nunlinked 4 13848
//
// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61
func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) {
fileLines := 9
if len(lines) != fileLines {
return nil, errors.New("Expected lines in kstat does not match")
}

tags["dataset"] = strings.Fields(lines[2])[2]
keys := strings.Fields(lines[1])
values := strings.Fields(lines[2])
keyCount := len(keys)
if keyCount != len(values) {
return nil, fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
}

fields := make(map[string]interface{})
for i := 3; i < len(lines); i++ {
lineFields := strings.Fields(lines[i])
fieldName := lineFields[0]
fieldData := lineFields[2]
value, err := strconv.ParseInt(fieldData, 10, 64)
if err != nil {
return nil, err
}

fields[fieldName] = value
}

return fields, nil
}

func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename)
if err != nil {
return err
}

var fields map[string]interface{}
var gatherErr error
tags := map[string]string{"pool": pool.name}
switch pool.version {
case v1:
fields, gatherErr = gatherV1(lines)
case v2:
fields, gatherErr = gatherV2(lines, tags)
case unknown:
return errors.New("Unknown metrics version detected")
}

if gatherErr != nil {
return err
}

acc.AddFields("zfs_pool", fields, tags)
return nil
}

Expand All @@ -93,10 +189,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatPath = "/proc/spl/kstat/zfs"
}

pools := getPools(kstatPath)
pools, err := getPools(kstatPath)
tags := getTags(pools)

if z.PoolMetrics {
if z.PoolMetrics && err == nil {
for _, pool := range pools {
err := gatherPoolStats(pool, acc)
if err != nil {
Expand Down
34 changes: 34 additions & 0 deletions plugins/inputs/zfs/zfs_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
`
const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512
name type data
dataset_name 7 HOME
writes 4 978
nwritten 4 6450688
reads 4 22
nread 4 1884160
nunlinks 4 14148
nunlinked 4 14147
`
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
name type data
zil_commit_count 4 77
Expand Down Expand Up @@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) {

acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)

err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644)
require.NoError(t, err)

acc.Metrics = nil

err = z.Gather(&acc)
require.NoError(t, err)

tags["dataset"] = "HOME"

poolMetrics = getPoolMetricsNewFormat()
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}
Expand Down Expand Up @@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} {
"rcnt": int64(0),
}
}

func getPoolMetricsNewFormat() map[string]interface{} {
return map[string]interface{}{
"nread": int64(1884160),
"nunlinked": int64(14147),
"nunlinks": int64(14148),
"nwritten": int64(6450688),
"reads": int64(22),
"writes": int64(978),
}
}

0 comments on commit 995a6c5

Please sign in to comment.