Skip to content

Commit

Permalink
feat: new collector for number of processes/threads per user
Browse files Browse the repository at this point in the history
  • Loading branch information
tbelda-ems committed Feb 15, 2024
1 parent 73ead3f commit 66fd0f3
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 1 deletion.
7 changes: 7 additions & 0 deletions METRICS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,10 @@
- io_stalled_avg60 (float)
- memory_waiting_avg60 (float) percentage over 60s
- memory_stalled_avg60 (float) percentage over 60s
- nodestat_userprocs
- tags:
- group
- user
- fields:
- processes (int)
- threads (int)
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Current collectors are:\
fc_host - fibrechannel metrics from /sys/class/fc_host/\
net - network interface metrics from /sys/class/net/\
pressure - metrics from /proc/pressure/\
userprocs - metrics for the number of processes and threads per user\
Metric timestamp precision will be 1s.

* Restart or reload Telegraf.
Expand All @@ -54,6 +55,8 @@ nodestat_fc_host,nodename=20000025ff1bab79,type=NPort\ (fabric\ via\ point-to-po
nodestat_net,interface=eno1,protocol=ethernet carrier=1i,flag_running=true,flag_up=true,operstate_code=0i,flag_lower_up=true,dormant=0i,duplex="full",link_mode=0i,operstate="up" 1662965695000000000
nodestat_net,interface=enp3s0f0,protocol=ethernet flag_running=true,flag_up=true,carrier=0i,duplex="unknown",link_mode=0i,operstate="down",operstate_code=5i,flag_lower_up=true,dormant=0i 1662965695000000000
nodestat_pressure cpu_waiting_avg60=0,io_waiting_avg60=21.1,io_stalled_avg60=20.97 1662965695000000000
nodestat_userprocs,group=root,user=root threads=173i,processes=124i 1662965695000000000
nodestat_userprocs,group=postfix,user=postfix processes=3i,threads=3i 1662965695000000000
```

# Metrics
Expand Down
4 changes: 3 additions & 1 deletion internal/collectors/collectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type CollectorInfo struct {
var collectInfos []CollectorInfo

func init() {
var c []CollectorInfo
var c = make([]CollectorInfo, 0, 4)
var ci CollectorInfo

ci = CollectorInfo{"fc_host", true, "fibrechannels", fssys.GatherSysFcHostInfo}
Expand All @@ -34,6 +34,8 @@ func init() {
c = append(c, ci)
ci = CollectorInfo{"pressure", true, "pressure", fsproc.GatherProcPressureInfo}
c = append(c, ci)
ci = CollectorInfo{"userprocs", true, "processes per user", fsproc.GatherProcUserProcsInfo}
c = append(c, ci)
collectInfos = c
}

Expand Down
91 changes: 91 additions & 0 deletions internal/collectors/fsproc/userprocs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// fsproc functions show metrics from linux /proc filesystem using influx line protocol
//
// Author: Tesifonte Belda
// License: The MIT License (MIT)
//
// References:
// https://github.com/prometheus/node_exporter/tree/master/collector/pressure_linux.go

package fsproc

import (
"fmt"
"os"
"os/user"
"time"

"github.com/prometheus/procfs"
"github.com/tesibelda/lightmetric/metric"
)

type userInfo struct {
processes int
threads int
}

// GatherProcUserProcsInfo prints number of process per user metrics from /proc/<PID>/status>
func GatherProcUserProcsInfo() error {
fs, err := procfs.NewDefaultFS()
if err != nil {
return fmt.Errorf("failed to open procfs: %w", err)
}

p, err := fs.AllProcs()
if err != nil {
return fmt.Errorf("unable to list all processes: %w", err)
}

uidprocs := make(map[string]userInfo, 10)
totalProcs, th, ok := 0, 0, false
info := userInfo{}
for _, pid := range p {
status, err := pid.NewStatus()
if err != nil {
// PIDs can vanish between getting the list and getting stats.
continue
}

th = 0
stat, err := pid.Stat()
if err == nil {
th = stat.NumThreads
}

if info, ok = uidprocs[status.UIDs[0]]; !ok {
info = userInfo{}
}
info.processes++
info.threads += th
uidprocs[status.UIDs[0]] = info
totalProcs++
}
if totalProcs == 0 {
return fmt.Errorf("unable to list any processes")
}

fields := make(map[string]interface{}, 2)
tags := make(map[string]string, 2)
var t time.Time
var m metric.Metric
for k, v := range uidprocs {
usr, err := user.LookupId(k)
if err != nil {
continue
}
if len(usr.Username) > 0 {
grp, err := user.LookupGroupId(usr.Gid)
if err != nil {
grp = &user.Group{}
}

fields["processes"] = v.processes
fields["threads"] = v.threads
tags["user"] = usr.Username
tags["group"] = grp.Name
t = metric.TimeWithPrecision(time.Now(), time.Second)
m = metric.New("nodestat_userprocs", tags, fields, t)
fmt.Fprint(os.Stdout, m.String(metric.InfluxLp))
}
}
return nil
}

0 comments on commit 66fd0f3

Please sign in to comment.