Skip to content

Commit

Permalink
add RTNL version of netclass collector (prometheus#2492)
Browse files Browse the repository at this point in the history
* update rtnetlink package to v1.2.3
* add RTNL version of netclass collector that have all the metrics that netdev collector provides, too.

Signed-off-by: Haoyu Sun <hasun@redhat.com>
  • Loading branch information
raptorsun authored Nov 21, 2022
1 parent 26e82af commit bba0e2e
Show file tree
Hide file tree
Showing 5 changed files with 328 additions and 70 deletions.
100 changes: 30 additions & 70 deletions collector/netclass_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,91 +95,51 @@ func (c *netClassCollector) Update(ch chan<- prometheus.Metric) error {

ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, ifaceInfo.Name, ifaceInfo.Address, ifaceInfo.Broadcast, ifaceInfo.Duplex, ifaceInfo.OperState, ifaceInfo.IfAlias)

if ifaceInfo.AddrAssignType != nil {
pushMetric(ch, c.subsystem, "address_assign_type", *ifaceInfo.AddrAssignType, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.Carrier != nil {
pushMetric(ch, c.subsystem, "carrier", *ifaceInfo.Carrier, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.CarrierChanges != nil {
pushMetric(ch, c.subsystem, "carrier_changes_total", *ifaceInfo.CarrierChanges, ifaceInfo.Name, prometheus.CounterValue)
}

if ifaceInfo.CarrierUpCount != nil {
pushMetric(ch, c.subsystem, "carrier_up_changes_total", *ifaceInfo.CarrierUpCount, ifaceInfo.Name, prometheus.CounterValue)
}

if ifaceInfo.CarrierDownCount != nil {
pushMetric(ch, c.subsystem, "carrier_down_changes_total", *ifaceInfo.CarrierDownCount, ifaceInfo.Name, prometheus.CounterValue)
}

if ifaceInfo.DevID != nil {
pushMetric(ch, c.subsystem, "device_id", *ifaceInfo.DevID, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.Dormant != nil {
pushMetric(ch, c.subsystem, "dormant", *ifaceInfo.Dormant, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.Flags != nil {
pushMetric(ch, c.subsystem, "flags", *ifaceInfo.Flags, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.IfIndex != nil {
pushMetric(ch, c.subsystem, "iface_id", *ifaceInfo.IfIndex, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.IfLink != nil {
pushMetric(ch, c.subsystem, "iface_link", *ifaceInfo.IfLink, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.LinkMode != nil {
pushMetric(ch, c.subsystem, "iface_link_mode", *ifaceInfo.LinkMode, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.MTU != nil {
pushMetric(ch, c.subsystem, "mtu_bytes", *ifaceInfo.MTU, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.NameAssignType != nil {
pushMetric(ch, c.subsystem, "name_assign_type", *ifaceInfo.NameAssignType, ifaceInfo.Name, prometheus.GaugeValue)
}

if ifaceInfo.NetDevGroup != nil {
pushMetric(ch, c.subsystem, "net_dev_group", *ifaceInfo.NetDevGroup, ifaceInfo.Name, prometheus.GaugeValue)
}
pushMetric(ch, c.getFieldDesc("address_assign_type"), "address_assign_type", ifaceInfo.AddrAssignType, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("carrier"), "carrier", ifaceInfo.Carrier, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("carrier_changes_total"), "carrier_changes_total", ifaceInfo.CarrierChanges, prometheus.CounterValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("carrier_up_changes_total"), "carrier_up_changes_total", ifaceInfo.CarrierUpCount, prometheus.CounterValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("carrier_down_changes_total"), "carrier_down_changes_total", ifaceInfo.CarrierDownCount, prometheus.CounterValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("device_id"), "device_id", ifaceInfo.DevID, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("dormant"), "dormant", ifaceInfo.Dormant, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("flags"), "flags", ifaceInfo.Flags, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("iface_id"), "iface_id", ifaceInfo.IfIndex, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("iface_link"), "iface_link", ifaceInfo.IfLink, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("iface_link_mode"), "iface_link_mode", ifaceInfo.LinkMode, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("mtu_bytes"), "mtu_bytes", ifaceInfo.MTU, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("name_assign_type"), "name_assign_type", ifaceInfo.NameAssignType, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("net_dev_group"), "net_dev_group", ifaceInfo.NetDevGroup, prometheus.GaugeValue, ifaceInfo.Name)

if ifaceInfo.Speed != nil {
// Some devices return -1 if the speed is unknown.
if *ifaceInfo.Speed >= 0 || !*netclassInvalidSpeed {
speedBytes := int64(*ifaceInfo.Speed * 1000 * 1000 / 8)
pushMetric(ch, c.subsystem, "speed_bytes", speedBytes, ifaceInfo.Name, prometheus.GaugeValue)
pushMetric(ch, c.getFieldDesc("speed_bytes"), "speed_bytes", speedBytes, prometheus.GaugeValue, ifaceInfo.Name)
}
}

if ifaceInfo.TxQueueLen != nil {
pushMetric(ch, c.subsystem, "transmit_queue_length", *ifaceInfo.TxQueueLen, ifaceInfo.Name, prometheus.GaugeValue)
}
pushMetric(ch, c.getFieldDesc("transmit_queue_length"), "transmit_queue_length", ifaceInfo.TxQueueLen, prometheus.GaugeValue, ifaceInfo.Name)
pushMetric(ch, c.getFieldDesc("protocol_type"), "protocol_type", ifaceInfo.Type, prometheus.GaugeValue, ifaceInfo.Name)

if ifaceInfo.Type != nil {
pushMetric(ch, c.subsystem, "protocol_type", *ifaceInfo.Type, ifaceInfo.Name, prometheus.GaugeValue)
}
}

return nil
}

func pushMetric(ch chan<- prometheus.Metric, subsystem string, name string, value int64, ifaceName string, valueType prometheus.ValueType) {
fieldDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, name),
fmt.Sprintf("%s value of /sys/class/net/<iface>.", name),
[]string{"device"},
nil,
)
func (c *netClassCollector) getFieldDesc(name string) *prometheus.Desc {
fieldDesc, exists := c.metricDescs[name]

if !exists {
fieldDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, name),
fmt.Sprintf("%s value of /sys/class/net/<iface>.", name),
[]string{"device"},
nil,
)
c.metricDescs[name] = fieldDesc
}

ch <- prometheus.MustNewConstMetric(fieldDesc, valueType, float64(value), ifaceName)
return fieldDesc
}

func (c *netClassCollector) getNetClassInfo() (sysfs.NetClass, error) {
Expand Down
229 changes: 229 additions & 0 deletions collector/netclass_rtnl_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nonetclass && linux
// +build !nonetclass,linux

package collector

import (
"errors"
"fmt"
"io/fs"
"regexp"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/jsimonetti/rtnetlink"
"github.com/mdlayher/ethtool"
"github.com/prometheus/client_golang/prometheus"
"gopkg.in/alecthomas/kingpin.v2"
)

var (
netclassRTNLIgnoredDevices = kingpin.Flag("collector.netclass_rtnl.ignored-devices", "Regexp of net devices to ignore for netclass_rtnl collector.").Default("^$").String()
netclassRTNLWithStats = kingpin.Flag("collector.netclass_rtnl.with-stats", "Expose the statistics for each network device, replacing netdev collector.").Bool()
operstateStr = []string{
"unknown", "notpresent", "down", "lowerlayerdown", "testing",
"dormant", "up",
}
)

type netClassRTNLCollector struct {
subsystem string
ignoredDevicesPattern *regexp.Regexp
metricDescs map[string]*prometheus.Desc
logger log.Logger
}

func init() {
registerCollector("netclass_rtnl", defaultDisabled, NewNetClassRTNLCollector)
}

// NewNetClassCollector returns a new Collector exposing network class stats.
func NewNetClassRTNLCollector(logger log.Logger) (Collector, error) {
pattern := regexp.MustCompile(*netclassRTNLIgnoredDevices)
return &netClassRTNLCollector{
subsystem: "network",
ignoredDevicesPattern: pattern,
metricDescs: map[string]*prometheus.Desc{},
logger: logger,
}, nil
}

func (c *netClassRTNLCollector) Update(ch chan<- prometheus.Metric) error {
linkModes := make(map[string]*ethtool.LinkMode)
lms, err := c.getLinkModes()
if err != nil {
if !errors.Is(errors.Unwrap(err), fs.ErrNotExist) {
return fmt.Errorf("could not get link modes: %w", err)
}
level.Info(c.logger).Log("msg", "ETHTOOL netlink interface unavailable, duplex and linkspeed are not scraped.")
} else {
for _, lm := range lms {
if c.ignoredDevicesPattern.MatchString(lm.Interface.Name) {
continue
}
if lm.SpeedMegabits >= 0 {
speedBytes := uint64(lm.SpeedMegabits * 1000 * 1000 / 8)
pushMetric(ch, c.getFieldDesc("speed_bytes"), "speed_bytes", speedBytes, prometheus.GaugeValue, lm.Interface.Name)
}
linkModes[lm.Interface.Name] = lm
}
}

lMsgs, err := c.getNetClassInfoRTNL()
if err != nil {
return fmt.Errorf("could not get net class info: %w", err)
}
for _, msg := range lMsgs {
if c.ignoredDevicesPattern.MatchString(msg.Attributes.Name) {
continue
}
upDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "up"),
"Value is 1 if operstate is 'up', 0 otherwise.",
[]string{"device"},
nil,
)
upValue := 0.0
if msg.Attributes.OperationalState == rtnetlink.OperStateUp {
upValue = 1.0
}
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, upValue, msg.Attributes.Name)

infoDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "info"),
"Non-numeric data of <iface>, value is always 1.",
[]string{"device", "address", "broadcast", "duplex", "operstate", "ifalias"},
nil,
)
infoValue := 1.0

var ifalias = ""
if msg.Attributes.Alias != nil {
ifalias = *msg.Attributes.Alias
}

duplex := ""
lm, lmExists := linkModes[msg.Attributes.Name]
if lmExists {
duplex = lm.Duplex.String()
}

ch <- prometheus.MustNewConstMetric(infoDesc, prometheus.GaugeValue, infoValue, msg.Attributes.Name, msg.Attributes.Address.String(), msg.Attributes.Broadcast.String(), duplex, operstateStr[int(msg.Attributes.OperationalState)], ifalias)

pushMetric(ch, c.getFieldDesc("carrier"), "carrier", msg.Attributes.Carrier, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("carrier_changes_total"), "carrier_changes_total", msg.Attributes.CarrierChanges, prometheus.CounterValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("carrier_up_changes_total"), "carrier_up_changes_total", msg.Attributes.CarrierUpCount, prometheus.CounterValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("carrier_down_changes_total"), "carrier_down_changes_total", msg.Attributes.CarrierDownCount, prometheus.CounterValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("flags"), "flags", msg.Flags, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("iface_id"), "iface_id", msg.Index, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("iface_link_mode"), "iface_link_mode", msg.Attributes.LinkMode, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("dormant"), "dormant", msg.Attributes.LinkMode, prometheus.GaugeValue, msg.Attributes.Name)

// kernel logic: IFLA_LINK attribute will be ignore when ifindex is the same as iflink
// (dev->ifindex != dev_get_iflink(dev) && nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))
// As interface ID is never 0, we assume msg.Attributes.Type 0 means iflink is omitted in RTM_GETLINK response.
if msg.Attributes.Type > 0 {
pushMetric(ch, c.getFieldDesc("iface_link"), "iface_link", msg.Attributes.Type, prometheus.GaugeValue, msg.Attributes.Name)
} else {
pushMetric(ch, c.getFieldDesc("iface_link"), "iface_link", msg.Index, prometheus.GaugeValue, msg.Attributes.Name)
}

pushMetric(ch, c.getFieldDesc("mtu_bytes"), "mtu_bytes", msg.Attributes.MTU, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("net_dev_group"), "net_dev_group", msg.Attributes.NetDevGroup, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_queue_length"), "transmit_queue_length", msg.Attributes.TxQueueLen, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("protocol_type"), "protocol_type", msg.Type, prometheus.GaugeValue, msg.Attributes.Name)

// skip statistics if argument collector.netclass_rtnl.with-stats is false or statistics are unavailable.
if netclassRTNLWithStats == nil || !*netclassRTNLWithStats || msg.Attributes.Stats64 == nil {
continue
}

pushMetric(ch, c.getFieldDesc("receive_packets_total"), "receive_packets_total", msg.Attributes.Stats64.RXPackets, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_packets_total"), "transmit_packets_total", msg.Attributes.Stats64.TXPackets, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_bytes_total"), "receive_bytes_total", msg.Attributes.Stats64.RXBytes, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_bytes_total"), "transmit_bytes_total", msg.Attributes.Stats64.TXBytes, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_errors_total"), "receive_errors_total", msg.Attributes.Stats64.RXErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_errors_total"), "transmit_errors_total", msg.Attributes.Stats64.TXErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_dropped_total"), "receive_dropped_total", msg.Attributes.Stats64.RXDropped, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_dropped_total"), "transmit_dropped_total", msg.Attributes.Stats64.TXDropped, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("multicast_total"), "multicast_total", msg.Attributes.Stats64.Multicast, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("collisions_total"), "collisions_total", msg.Attributes.Stats64.Collisions, prometheus.GaugeValue, msg.Attributes.Name)

// detailed rx_errors
pushMetric(ch, c.getFieldDesc("receive_length_errors_total"), "receive_length_errors_total", msg.Attributes.Stats64.RXLengthErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_over_errors_total"), "receive_over_errors_total", msg.Attributes.Stats64.RXOverErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_crc_errors_total"), "receive_crc_errors_total", msg.Attributes.Stats64.RXCRCErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_frame_errors_total"), "receive_frame_errors_total", msg.Attributes.Stats64.RXFrameErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_fifo_errors_total"), "receive_fifo_errors_total", msg.Attributes.Stats64.RXFIFOErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_missed_errors_total"), "receive_missed_errors_total", msg.Attributes.Stats64.RXMissedErrors, prometheus.GaugeValue, msg.Attributes.Name)

// detailed tx_errors
pushMetric(ch, c.getFieldDesc("transmit_aborted_errors_total"), "transmit_aborted_errors_total", msg.Attributes.Stats64.TXAbortedErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_carrier_errors_total"), "transmit_carrier_errors_total", msg.Attributes.Stats64.TXCarrierErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_fifo_errors_total"), "transmit_fifo_errors_total", msg.Attributes.Stats64.TXFIFOErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_heartbeat_errors_total"), "transmit_heartbeat_errors_total", msg.Attributes.Stats64.TXHeartbeatErrors, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_window_errors_total"), "transmit_window_errors_total", msg.Attributes.Stats64.TXWindowErrors, prometheus.GaugeValue, msg.Attributes.Name)

// for cslip etc
pushMetric(ch, c.getFieldDesc("receive_compressed_total"), "receive_compressed_total", msg.Attributes.Stats64.RXCompressed, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("transmit_compressed_total"), "transmit_compressed_total", msg.Attributes.Stats64.TXCompressed, prometheus.GaugeValue, msg.Attributes.Name)
pushMetric(ch, c.getFieldDesc("receive_nohandler_total"), "receive_nohandler_total", msg.Attributes.Stats64.RXNoHandler, prometheus.GaugeValue, msg.Attributes.Name)

}

return nil
}

func (c *netClassRTNLCollector) getFieldDesc(name string) *prometheus.Desc {
fieldDesc, exists := c.metricDescs[name]

if !exists {
fieldDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, name),
fmt.Sprintf("Network device property %s.", name),
[]string{"device"},
nil,
)
c.metricDescs[name] = fieldDesc
}

return fieldDesc
}

func (c *netClassRTNLCollector) getNetClassInfoRTNL() ([]rtnetlink.LinkMessage, error) {
conn, err := rtnetlink.Dial(nil)
if err != nil {
return nil, err
}
defer conn.Close()

lMsgs, err := conn.Link.List()

return lMsgs, err

}

func (c *netClassRTNLCollector) getLinkModes() ([]*ethtool.LinkMode, error) {
conn, err := ethtool.New()
if err != nil {
return nil, err
}
defer conn.Close()

lms, err := conn.LinkModes()

return lms, err
}
Loading

0 comments on commit bba0e2e

Please sign in to comment.