From 6f3cd05935a2faaf14d16c2e643f54e6f9134c0f Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 7 Nov 2023 15:52:04 +0400 Subject: [PATCH] refactor: update packet capture to use 'afpacket' interface First of all, this interface is way more performant than `pcap` interface. It is Linux-specific, but we don't care in Talos Linux :) Second, this drop dependency of `machined` on `gopacket/layers` package, which has huge issues with memory allocations and startup time. This cuts around 20MiB of process RSS for all Talos processes. (`talosctl` still requires this `gopacket/layers` library for decoding packets). Fixes #7880 Signed-off-by: Andrey Smirnov --- .../server/v1alpha1/v1alpha1_server.go | 59 +++++----- internal/pkg/pcap/pcap.go | 109 ++++++++++++++++++ 2 files changed, 140 insertions(+), 28 deletions(-) create mode 100644 internal/pkg/pcap/pcap.go diff --git a/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go b/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go index 75d5b7baae..4f0cf7525b 100644 --- a/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go +++ b/internal/app/machined/internal/server/v1alpha1/v1alpha1_server.go @@ -29,9 +29,7 @@ import ( "github.com/cosi-project/runtime/pkg/state/protobuf/server" "github.com/google/go-cmp/cmp" "github.com/google/uuid" - "github.com/gopacket/gopacket" - "github.com/gopacket/gopacket/layers" - "github.com/gopacket/gopacket/pcapgo" + "github.com/gopacket/gopacket/afpacket" multierror "github.com/hashicorp/go-multierror" "github.com/nberlee/go-netstat/netstat" "github.com/prometheus/procfs" @@ -66,6 +64,7 @@ import ( "github.com/siderolabs/talos/internal/pkg/install" "github.com/siderolabs/talos/internal/pkg/meta" "github.com/siderolabs/talos/internal/pkg/miniprocfs" + "github.com/siderolabs/talos/internal/pkg/pcap" "github.com/siderolabs/talos/pkg/archiver" "github.com/siderolabs/talos/pkg/chunker" "github.com/siderolabs/talos/pkg/chunker/stream" @@ -2169,13 +2168,13 @@ func (s *Server) PacketCapture(in *machine.PacketCaptureRequest, srv machine.Mac return err } - var linkType layers.LinkType + var linkType pcap.LinkType switch linkInfo.TypedSpec().Type { //nolint:exhaustive case nethelpers.LinkEther, nethelpers.LinkLoopbck: - linkType = layers.LinkTypeEthernet + linkType = pcap.LinkTypeEthernet case nethelpers.LinkNone: - linkType = layers.LinkTypeRaw + linkType = pcap.LinkTypeRaw default: return status.Errorf(codes.InvalidArgument, "unsupported link type %s", linkInfo.TypedSpec().Type) } @@ -2197,27 +2196,25 @@ func (s *Server) PacketCapture(in *machine.PacketCaptureRequest, srv machine.Mac }) } - handle, err := pcapgo.NewEthernetHandle(in.Interface) + handle, err := afpacket.NewTPacket( + afpacket.OptInterface(in.Interface), + afpacket.OptFrameSize(int(in.SnapLen)), + afpacket.OptBlockSize(int(in.SnapLen)*128), + ) if err != nil { - return fmt.Errorf("error setting up packet capture on %q: %w", in.Interface, err) - } - - if err = handle.SetCaptureLength(int(in.SnapLen)); err != nil { - handle.Close() //nolint:errcheck - - return fmt.Errorf("error setting capture length %q: %w", in.SnapLen, err) + return fmt.Errorf("error creating afpacket handle: %w", err) } if len(filter) > 0 { if err = handle.SetBPF(filter); err != nil { - handle.Close() //nolint:errcheck + handle.Close() return fmt.Errorf("error setting BPF filter: %w", err) } } if err = handle.SetPromiscuous(in.Promiscuous); err != nil { - handle.Close() //nolint:errcheck + handle.Close() return fmt.Errorf("error setting promiscuous mode %v: %w", in.Promiscuous, err) } @@ -2246,11 +2243,11 @@ func (s *Server) PacketCapture(in *machine.PacketCaptureRequest, srv machine.Mac } //nolint:gocyclo -func capturePackets(pw *io.PipeWriter, handle *pcapgo.EthernetHandle, snapLen uint32, linkType layers.LinkType) { - defer pw.Close() //nolint:errcheck - defer handle.Close() //nolint:errcheck +func capturePackets(pw *io.PipeWriter, handle *afpacket.TPacket, snapLen uint32, linkType pcap.LinkType) { + defer pw.Close() //nolint:errcheck + defer handle.Close() - pcapw := pcapgo.NewWriterNanos(pw) + pcapw := pcap.NewWriter(pw) if err := pcapw.WriteFileHeader(snapLen, linkType); err != nil { pw.CloseWithError(err) @@ -2259,19 +2256,25 @@ func capturePackets(pw *io.PipeWriter, handle *pcapgo.EthernetHandle, snapLen ui } defer func() { - stats, err := handle.Stats() - if err == nil { - log.Printf("pcap: packets captured %d, dropped %d", stats.Packets, stats.Drops) + infoMessage := "pcap: " + + stats, errStats := handle.Stats() + if errStats == nil { + infoMessage += fmt.Sprintf("packets captured %d, polls %d", stats.Packets, stats.Polls) } - }() - pkgsrc := gopacket.NewPacketSource(handle, layers.LayerTypeEthernet) - pkgsrc.Lazy = true + _, socketStatsV3, socketStatsErr := handle.SocketStats() + if socketStatsErr == nil { + infoMessage += fmt.Sprintf(", socket stats: drops %d, packets %d, queue freezes %d", socketStatsV3.Drops(), socketStatsV3.Packets(), socketStatsV3.QueueFreezes()) + } + + log.Print(infoMessage) + }() for { - packet, err := pkgsrc.NextPacket() + data, captureData, err := handle.ZeroCopyReadPacketData() if err == nil { - if err = pcapw.WritePacket(packet.Metadata().CaptureInfo, packet.Data()); err != nil { + if err = pcapw.WritePacket(captureData, data); err != nil { pw.CloseWithError(err) return diff --git a/internal/pkg/pcap/pcap.go b/internal/pkg/pcap/pcap.go new file mode 100644 index 0000000000..3953450a7f --- /dev/null +++ b/internal/pkg/pcap/pcap.go @@ -0,0 +1,109 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Package pcap implements writing packet data to pcap files. +package pcap + +import ( + "encoding/binary" + "fmt" + "io" + "time" + + "github.com/gopacket/gopacket" +) + +// Writer wraps an underlying io.Writer to write packet data in PCAP +// format. See http://wiki.wireshark.org/Development/LibpcapFileFormat +// for information on the file format. +// +// For those that care, we currently write v2.4 files with nanosecond +// or microsecond timestamp resolution and little-endian encoding. +type Writer struct { + w io.Writer + buf [16]byte +} + +const ( + magicNanoseconds = 0xA1B23C4D + versionMajor = 2 + versionMinor = 4 +) + +// LinkType is the link type for the pcap file. +type LinkType uint32 + +// LinkType values. +const ( + LinkTypeEthernet LinkType = 1 + LinkTypeRaw LinkType = 101 +) + +// NewWriter returns a new writer object. +// +// If this is a new empty writer (as opposed to +// an append), you must call WriteFileHeader before WritePacket. Packet +// timestamps are written with nanosecond precision. +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w} +} + +// WriteFileHeader writes a file header out to the writer. +// This must be called exactly once per output. +func (w *Writer) WriteFileHeader(snaplen uint32, linktype LinkType) error { + var buf [24]byte + + binary.LittleEndian.PutUint32(buf[0:4], magicNanoseconds) + binary.LittleEndian.PutUint16(buf[4:6], versionMajor) + binary.LittleEndian.PutUint16(buf[6:8], versionMinor) + + // bytes 8:12 stay 0 (timezone = UTC) + // bytes 12:16 stay 0 (sigfigs is always set to zero, according to + // http://wiki.wireshark.org/Development/LibpcapFileFormat + binary.LittleEndian.PutUint32(buf[16:20], snaplen) + binary.LittleEndian.PutUint32(buf[20:24], uint32(linktype)) + + _, err := w.w.Write(buf[:]) + + return err +} + +func (w *Writer) writePacketHeader(ci gopacket.CaptureInfo) error { + t := ci.Timestamp + if t.IsZero() { + t = time.Now() + } + + secs := t.Unix() + binary.LittleEndian.PutUint32(w.buf[0:4], uint32(secs)) + + usecs := t.Nanosecond() + binary.LittleEndian.PutUint32(w.buf[4:8], uint32(usecs)) + + binary.LittleEndian.PutUint32(w.buf[8:12], uint32(ci.CaptureLength)) + binary.LittleEndian.PutUint32(w.buf[12:16], uint32(ci.Length)) + + _, err := w.w.Write(w.buf[:]) + + return err +} + +// WritePacket writes the given packet data out to the file. +func (w *Writer) WritePacket(ci gopacket.CaptureInfo, data []byte) error { + if ci.CaptureLength != len(data) { + return fmt.Errorf("capture length %d does not match data length %d", ci.CaptureLength, len(data)) + } + + if ci.CaptureLength > ci.Length { + return fmt.Errorf("invalid capture info %+v: capture length > length", ci) + } + + if err := w.writePacketHeader(ci); err != nil { + return fmt.Errorf("error writing packet header: %v", err) + } + + _, err := w.w.Write(data) + + return err +}