diff --git a/plugins/inputs/zfs/README.md b/plugins/inputs/zfs/README.md index 77b101915bbe6..b77cbc3a6647d 100644 --- a/plugins/inputs/zfs/README.md +++ b/plugins/inputs/zfs/README.md @@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics): - wcnt (integer, count) - rcnt (integer, count) +For ZFS >= 2.1.x the format has changed significantly: + +- zfs_pool + - writes (integer, count) + - nwritten (integer, bytes) + - reads (integer, count) + - nread (integer, bytes) + - nunlinks (integer, count) + - nunlinked (integer, count) + On FreeBSD: - zfs_pool @@ -229,6 +239,7 @@ On FreeBSD: - Pool metrics (`zfs_pool`) will have the following tag: - pool - with the name of the pool which the metrics are for. - health - the health status of the pool. (FreeBSD only) + - dataset - ZFS >= 2.1.x only. (Linux only) - Dataset metrics (`zfs_dataset`) will have the following tag: - dataset - with the name of the dataset which the metrics are for. diff --git a/plugins/inputs/zfs/zfs_linux.go b/plugins/inputs/zfs/zfs_linux.go index ac3ca6ee81d23..0b625ccebd30a 100644 --- a/plugins/inputs/zfs/zfs_linux.go +++ b/plugins/inputs/zfs/zfs_linux.go @@ -4,6 +4,7 @@ package zfs import ( + "errors" "fmt" "path/filepath" "strconv" @@ -14,22 +15,56 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" ) +type metricsVersion uint8 + +const ( + unknown metricsVersion = iota + v1 + v2 +) + type poolInfo struct { name string ioFilename string + version metricsVersion +} + +func probeVersion(kstatPath string) (metricsVersion, []string, error) { + poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath)) + + // From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed. + // Because of this we need to determine how to fallback differently. + if err != nil { + return unknown, poolsDirs, err + } + + if len(poolsDirs) > 0 { + return v2, poolsDirs, nil + } + + // Fallback to the old kstat in case of an older ZFS version. + poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath)) + if err != nil { + return unknown, poolsDirs, err + } + + return v1, poolsDirs, nil } -func getPools(kstatPath string) []poolInfo { +func getPools(kstatPath string) ([]poolInfo, error) { pools := make([]poolInfo, 0) - poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") + version, poolsDirs, err := probeVersion(kstatPath) + if err != nil { + return nil, err + } for _, poolDir := range poolsDirs { poolDirSplit := strings.Split(poolDir, "/") pool := poolDirSplit[len(poolDirSplit)-2] - pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) + pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version}) } - return pools + return pools, nil } func getTags(pools []poolInfo) map[string]string { @@ -45,36 +80,99 @@ func getTags(pools []poolInfo) map[string]string { return map[string]string{"pools": poolNames} } -func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { - lines, err := internal.ReadLines(pool.ioFilename) - if err != nil { - return err - } - - if len(lines) != 3 { - return err +func gather(lines []string, fileLines int) ([]string, []string, error) { + if len(lines) != fileLines { + return nil, nil, errors.New("Expected lines in kstat does not match") } keys := strings.Fields(lines[1]) values := strings.Fields(lines[2]) + if len(keys) != len(values) { + return nil, nil, fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) + } - keyCount := len(keys) + return keys, values, nil +} - if keyCount != len(values) { - return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values) +func gatherV1(lines []string) (map[string]interface{}, error) { + fileLines := 3 + keys, values, err := gather(lines, fileLines) + if err != nil { + return nil, err } - tag := map[string]string{"pool": pool.name} fields := make(map[string]interface{}) - for i := 0; i < keyCount; i++ { + for i := 0; i < len(keys); i++ { value, err := strconv.ParseInt(values[i], 10, 64) if err != nil { - return err + return nil, err } + fields[keys[i]] = value } - acc.AddFields("zfs_pool", fields, tag) + return fields, nil +} + +// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this: +// 36 1 0x01 7 2160 5214787391 73405258558961 +// name type data +// dataset_name 7 rpool/ROOT/pve-1 +// writes 4 409570 +// nwritten 4 2063419969 +// reads 4 22108699 +// nread 4 63067280992 +// nunlinks 4 13849 +// nunlinked 4 13848 +// +// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61 +func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) { + fileLines := 9 + _, _, err := gather(lines, fileLines) + if err != nil { + return nil, err + } + + tags["dataset"] = strings.Fields(lines[2])[2] + fields := make(map[string]interface{}) + for i := 3; i < len(lines); i++ { + lineFields := strings.Fields(lines[i]) + fieldName := lineFields[0] + fieldData := lineFields[2] + value, err := strconv.ParseInt(fieldData, 10, 64) + if err != nil { + return nil, err + } + + fields[fieldName] = value + } + + return fields, nil +} + +func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { + lines, err := internal.ReadLines(pool.ioFilename) + if err != nil { + return err + } + + var fields map[string]interface{} + var gatherErr error + tags := map[string]string{"pool": pool.name} + switch pool.version { + case v1: + fields, gatherErr = gatherV1(lines) + case v2: + fields, gatherErr = gatherV2(lines, tags) + case unknown: + return errors.New("Unknown metrics version detected") + } + + if gatherErr != nil { + return err + } + + acc.AddFields("zfs_pool", fields, tags) return nil } @@ -93,10 +191,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error { kstatPath = "/proc/spl/kstat/zfs" } - pools := getPools(kstatPath) + pools, err := getPools(kstatPath) tags := getTags(pools) - if z.PoolMetrics { + if z.PoolMetrics && err == nil { for _, pool := range pools { err := gatherPoolStats(pool, acc) if err != nil { diff --git a/plugins/inputs/zfs/zfs_linux_test.go b/plugins/inputs/zfs/zfs_linux_test.go index b844759eaffd1..3fe23e93a470e 100644 --- a/plugins/inputs/zfs/zfs_linux_test.go +++ b/plugins/inputs/zfs/zfs_linux_test.go @@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628 nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt 1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0 ` +const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512 +name type data +dataset_name 7 HOME +writes 4 978 +nwritten 4 6450688 +reads 4 22 +nread 4 1884160 +nunlinks 4 14148 +nunlinked 4 14147 +` const zilContents = `7 1 0x01 14 672 34118481334 437444452158445 name type data zil_commit_count 4 77 @@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) { acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) + err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644) + require.NoError(t, err) + + acc.Metrics = nil + + err = z.Gather(&acc) + require.NoError(t, err) + + tags["dataset"] = "HOME" + + poolMetrics = getPoolMetricsNewFormat() + acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) + err = os.RemoveAll(os.TempDir() + "/telegraf") require.NoError(t, err) } @@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} { "rcnt": int64(0), } } + +func getPoolMetricsNewFormat() map[string]interface{} { + return map[string]interface{}{ + "nread": int64(1884160), + "nunlinked": int64(14147), + "nunlinks": int64(14148), + "nwritten": int64(6450688), + "reads": int64(22), + "writes": int64(978), + } +}