From ed4131d498d8536748a6379198d7cf7b2856abe7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:30:25 +0000 Subject: [PATCH] Bump github.com/vishvananda/netlink Bumps [github.com/vishvananda/netlink](https://github.com/vishvananda/netlink) from 0.0.0-20170808154308-f5a6f697a596 to 1.2.1-beta.2. - [Release notes](https://github.com/vishvananda/netlink/releases) - [Commits](https://github.com/vishvananda/netlink/commits/v1.2.1-beta.2) --- updated-dependencies: - dependency-name: github.com/vishvananda/netlink dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- go.mod | 4 +- go.sum | 10 +- .../github.com/vishvananda/netlink/.gitignore | 1 + .../vishvananda/netlink/.travis.yml | 13 - .../vishvananda/netlink/CHANGELOG.md | 5 + .../github.com/vishvananda/netlink/Makefile | 5 +- .../github.com/vishvananda/netlink/README.md | 3 +- vendor/github.com/vishvananda/netlink/addr.go | 1 + .../vishvananda/netlink/addr_linux.go | 236 +- .../vishvananda/netlink/bpf_linux.go | 107 +- .../vishvananda/netlink/bridge_linux.go | 33 +- .../github.com/vishvananda/netlink/class.go | 169 +- .../vishvananda/netlink/class_linux.go | 189 +- .../vishvananda/netlink/conntrack_linux.go | 328 ++- .../vishvananda/netlink/devlink_linux.go | 728 +++++ .../github.com/vishvananda/netlink/filter.go | 219 +- .../vishvananda/netlink/filter_linux.go | 652 ++++- vendor/github.com/vishvananda/netlink/fou.go | 21 + .../vishvananda/netlink/fou_linux.go | 211 ++ .../vishvananda/netlink/fou_unspecified.go | 15 + .../vishvananda/netlink/genetlink_linux.go | 10 +- .../vishvananda/netlink/gtp_linux.go | 15 +- .../vishvananda/netlink/handle_linux.go | 94 +- .../vishvananda/netlink/handle_unspecified.go | 60 +- .../vishvananda/netlink/inet_diag.go | 31 + .../vishvananda/netlink/ioctl_linux.go | 90 + .../vishvananda/netlink/ipset_linux.go | 504 ++++ vendor/github.com/vishvananda/netlink/link.go | 657 ++++- .../vishvananda/netlink/link_linux.go | 2352 ++++++++++++++--- .../github.com/vishvananda/netlink/neigh.go | 11 + .../vishvananda/netlink/neigh_linux.go | 260 +- .../github.com/vishvananda/netlink/netlink.go | 3 +- .../netlink/netlink_unspecified.go | 38 +- .../vishvananda/netlink/netns_linux.go | 141 + .../vishvananda/netlink/netns_unspecified.go | 19 + .../vishvananda/netlink/nl/addr_linux.go | 27 +- .../vishvananda/netlink/nl/bridge_linux.go | 4 +- .../vishvananda/netlink/nl/conntrack_linux.go | 46 +- .../vishvananda/netlink/nl/devlink_linux.go | 96 + .../vishvananda/netlink/nl/ipset_linux.go | 222 ++ .../vishvananda/netlink/nl/link_linux.go | 233 +- .../vishvananda/netlink/nl/lwt_linux.go | 29 + .../vishvananda/netlink/nl/nl_linux.go | 450 +++- .../netlink/nl/parse_attr_linux.go | 79 + .../vishvananda/netlink/nl/rdma_link_linux.go | 39 + .../vishvananda/netlink/nl/route_linux.go | 65 +- .../vishvananda/netlink/nl/seg6_linux.go | 154 ++ .../vishvananda/netlink/nl/seg6local_linux.go | 76 + .../vishvananda/netlink/nl/syscall.go | 30 +- .../vishvananda/netlink/nl/tc_linux.go | 446 +++- .../vishvananda/netlink/nl/xfrm_linux.go | 62 +- .../netlink/nl/xfrm_state_linux.go | 2 +- .../vishvananda/netlink/proc_event_linux.go | 217 ++ .../vishvananda/netlink/protinfo.go | 4 + .../vishvananda/netlink/protinfo_linux.go | 16 +- .../github.com/vishvananda/netlink/qdisc.go | 134 + .../vishvananda/netlink/qdisc_linux.go | 305 ++- .../vishvananda/netlink/rdma_link_linux.go | 331 +++ .../github.com/vishvananda/netlink/route.go | 145 +- .../vishvananda/netlink/route_linux.go | 1093 +++++++- .../vishvananda/netlink/route_unspecified.go | 10 + vendor/github.com/vishvananda/netlink/rule.go | 30 +- .../vishvananda/netlink/rule_linux.go | 134 +- .../vishvananda/netlink/socket_linux.go | 156 +- vendor/github.com/vishvananda/netlink/tcp.go | 84 + .../vishvananda/netlink/tcp_linux.go | 353 +++ vendor/github.com/vishvananda/netlink/xfrm.go | 13 +- .../vishvananda/netlink/xfrm_monitor_linux.go | 13 +- .../vishvananda/netlink/xfrm_policy.go | 39 +- .../vishvananda/netlink/xfrm_policy_linux.go | 44 +- .../vishvananda/netlink/xfrm_state.go | 33 +- .../vishvananda/netlink/xfrm_state_linux.go | 87 +- vendor/github.com/vishvananda/netns/README.md | 1 - vendor/github.com/vishvananda/netns/netns.go | 19 +- .../vishvananda/netns/netns_linux.go | 94 +- vendor/modules.txt | 8 +- 76 files changed, 11146 insertions(+), 1512 deletions(-) create mode 100644 vendor/github.com/vishvananda/netlink/.gitignore delete mode 100644 vendor/github.com/vishvananda/netlink/.travis.yml create mode 100644 vendor/github.com/vishvananda/netlink/CHANGELOG.md create mode 100644 vendor/github.com/vishvananda/netlink/devlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/fou.go create mode 100644 vendor/github.com/vishvananda/netlink/fou_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/fou_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/inet_diag.go create mode 100644 vendor/github.com/vishvananda/netlink/ioctl_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/netns_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/netns_unspecified.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/devlink_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/ipset_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/lwt_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/seg6_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/proc_event_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/rdma_link_linux.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp.go create mode 100644 vendor/github.com/vishvananda/netlink/tcp_linux.go diff --git a/go.mod b/go.mod index b1e5b4fff7..c00d6d3e39 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/diskfs/go-diskfs v1.3.0 github.com/packethost/packngo v0.1.0 github.com/sirupsen/logrus v1.9.0 - github.com/vishvananda/netlink v0.0.0-20170808154308-f5a6f697a596 + github.com/vishvananda/netlink v1.2.1-beta.2 github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3 ) @@ -15,7 +15,7 @@ require ( github.com/pierrec/lz4 v2.6.1+incompatible // indirect github.com/pkg/xattr v0.4.9 // indirect github.com/ulikunitz/xz v0.5.11 // indirect - github.com/vishvananda/netns v0.0.0-20170707011535-86bef332bfc3 // indirect + github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae // indirect golang.org/x/sys v0.8.0 // indirect gopkg.in/djherbis/times.v1 v1.3.0 // indirect ) diff --git a/go.sum b/go.sum index 4524f17e2e..db34454bf5 100644 --- a/go.sum +++ b/go.sum @@ -38,14 +38,16 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/vishvananda/netlink v0.0.0-20170808154308-f5a6f697a596 h1:K6pwCps8j1ylaB37G0r6hGajvbNsdm+0ITJ6L88r65w= -github.com/vishvananda/netlink v0.0.0-20170808154308-f5a6f697a596/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= -github.com/vishvananda/netns v0.0.0-20170707011535-86bef332bfc3 h1:NcYCJC+LbOrfvuf/uHeM/kxh6vOmiuInC4GAWRdc+P0= -github.com/vishvananda/netns v0.0.0-20170707011535-86bef332bfc3/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= +github.com/vishvananda/netlink v1.2.1-beta.2 h1:Llsql0lnQEbHj0I1OuKyp8otXp0r3q0mPkuhwHfStVs= +github.com/vishvananda/netlink v1.2.1-beta.2/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3Cym0ZtKyq7L16eZUtYKs+BaHDN6mAns= +github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3 h1:v6jG/tdl4O07LNVp74Nt7/OyL+1JsIW1M2f/nSvQheY= github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3/go.mod h1:CSBTxrhePCm0cmXNKDGeu+6bOQzpaEklfCqEpn89JWk= golang.org/x/sys v0.0.0-20181021155630-eda9bb28ed51/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/vendor/github.com/vishvananda/netlink/.gitignore b/vendor/github.com/vishvananda/netlink/.gitignore new file mode 100644 index 0000000000..9f11b755a1 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/vendor/github.com/vishvananda/netlink/.travis.yml b/vendor/github.com/vishvananda/netlink/.travis.yml deleted file mode 100644 index f5c0b3eb5f..0000000000 --- a/vendor/github.com/vishvananda/netlink/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -language: go -before_script: - # make sure we keep path in tact when we sudo - - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers - # modprobe ip_gre or else the first gre device can't be deleted - - sudo modprobe ip_gre - # modprobe nf_conntrack for the conntrack testing - - sudo modprobe nf_conntrack - - sudo modprobe nf_conntrack_netlink - - sudo modprobe nf_conntrack_ipv4 - - sudo modprobe nf_conntrack_ipv6 -install: - - go get github.com/vishvananda/netns diff --git a/vendor/github.com/vishvananda/netlink/CHANGELOG.md b/vendor/github.com/vishvananda/netlink/CHANGELOG.md new file mode 100644 index 0000000000..b11e59ff6a --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## 1.0.0 (2018-03-15) + +Initial release tagging \ No newline at end of file diff --git a/vendor/github.com/vishvananda/netlink/Makefile b/vendor/github.com/vishvananda/netlink/Makefile index 6c8413b13a..a0e68e7a9a 100644 --- a/vendor/github.com/vishvananda/netlink/Makefile +++ b/vendor/github.com/vishvananda/netlink/Makefile @@ -3,7 +3,8 @@ DIRS := \ nl DEPS = \ - github.com/vishvananda/netns + github.com/vishvananda/netns \ + golang.org/x/sys/unix uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1))) testdirs = $(call uniq,$(foreach d,$(1),$(dir $(wildcard $(d)/*_test.go)))) @@ -18,7 +19,7 @@ $(call goroot,$(DEPS)): .PHONY: $(call testdirs,$(DIRS)) $(call testdirs,$(DIRS)): - sudo -E go test -test.parallel 4 -timeout 60s -v github.com/vishvananda/netlink/$@ + go test -test.exec sudo -test.parallel 4 -timeout 60s -test.v github.com/vishvananda/netlink/$@ $(call fmt,$(call testdirs,$(DIRS))): ! gofmt -l $(subst fmt-,,$@)/*.go | grep -q . diff --git a/vendor/github.com/vishvananda/netlink/README.md b/vendor/github.com/vishvananda/netlink/README.md index 0b61be217e..0128bc67d5 100644 --- a/vendor/github.com/vishvananda/netlink/README.md +++ b/vendor/github.com/vishvananda/netlink/README.md @@ -1,6 +1,6 @@ # netlink - netlink library for go # -[![Build Status](https://travis-ci.org/vishvananda/netlink.png?branch=master)](https://travis-ci.org/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) +![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) The netlink package provides a simple netlink library for go. Netlink is the interface a user-space program in linux uses to communicate with @@ -89,3 +89,4 @@ There are also a few pieces of low level netlink functionality that still need to be implemented. Routing rules are not in place and some of the more advanced link types. Hopefully there is decent structure and testing in place to make these fairly straightforward to add. + diff --git a/vendor/github.com/vishvananda/netlink/addr.go b/vendor/github.com/vishvananda/netlink/addr.go index f08c956969..653f540dbc 100644 --- a/vendor/github.com/vishvananda/netlink/addr.go +++ b/vendor/github.com/vishvananda/netlink/addr.go @@ -17,6 +17,7 @@ type Addr struct { Broadcast net.IP PreferedLft int ValidLft int + LinkIndex int } // String returns $ip/$netmask $label diff --git a/vendor/github.com/vishvananda/netlink/addr_linux.go b/vendor/github.com/vishvananda/netlink/addr_linux.go index 92edb90d09..72862ce1f4 100644 --- a/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -2,54 +2,74 @@ package netlink import ( "fmt" - "log" "net" "strings" "syscall" "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) -// IFA_FLAGS is a u32 attribute. -const IFA_FLAGS = 0x8 - // AddrAdd will add an IP address to a link device. +// // Equivalent to: `ip addr add $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func AddrAdd(link Link, addr *Addr) error { return pkgHandle.AddrAdd(link, addr) } // AddrAdd will add an IP address to a link device. +// // Equivalent to: `ip addr add $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func (h *Handle) AddrAdd(link Link, addr *Addr) error { - req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) return h.addrHandle(link, addr, req) } // AddrReplace will replace (or, if not present, add) an IP address on a link device. +// // Equivalent to: `ip addr replace $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func AddrReplace(link Link, addr *Addr) error { return pkgHandle.AddrReplace(link, addr) } // AddrReplace will replace (or, if not present, add) an IP address on a link device. +// // Equivalent to: `ip addr replace $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func (h *Handle) AddrReplace(link Link, addr *Addr) error { - req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_REPLACE|unix.NLM_F_ACK) return h.addrHandle(link, addr, req) } // AddrDel will delete an IP address from a link device. +// // Equivalent to: `ip addr del $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func AddrDel(link Link, addr *Addr) error { return pkgHandle.AddrDel(link, addr) } // AddrDel will delete an IP address from a link device. // Equivalent to: `ip addr del $addr dev $link` +// +// If `addr` is an IPv4 address and the broadcast address is not given, it +// will be automatically computed based on the IP mask if /30 or larger. func (h *Handle) AddrDel(link Link, addr *Addr) error { - req := h.newNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELADDR, unix.NLM_F_ACK) return h.addrHandle(link, addr, req) } @@ -65,7 +85,11 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error msg := nl.NewIfAddrmsg(family) msg.Index = uint32(base.Index) msg.Scope = uint8(addr.Scope) - prefixlen, masklen := addr.Mask.Size() + mask := addr.Mask + if addr.Peer != nil { + mask = addr.Peer.Mask + } + prefixlen, masklen := mask.Size() msg.Prefixlen = uint8(prefixlen) req.AddData(msg) @@ -76,7 +100,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error localAddrData = addr.IP.To16() } - localData := nl.NewRtAttr(syscall.IFA_LOCAL, localAddrData) + localData := nl.NewRtAttr(unix.IFA_LOCAL, localAddrData) req.AddData(localData) var peerAddrData []byte if addr.Peer != nil { @@ -89,7 +113,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error peerAddrData = localAddrData } - addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, peerAddrData) + addressData := nl.NewRtAttr(unix.IFA_ADDRESS, peerAddrData) req.AddData(addressData) if addr.Flags != 0 { @@ -98,26 +122,45 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error } else { b := make([]byte, 4) native.PutUint32(b, uint32(addr.Flags)) - flagsData := nl.NewRtAttr(IFA_FLAGS, b) + flagsData := nl.NewRtAttr(unix.IFA_FLAGS, b) req.AddData(flagsData) } } - if addr.Broadcast == nil { - calcBroadcast := make(net.IP, masklen/8) - for i := range localAddrData { - calcBroadcast[i] = localAddrData[i] | ^addr.Mask[i] + if family == FAMILY_V4 { + // Automatically set the broadcast address if it is unset and the + // subnet is large enough to sensibly have one (/30 or larger). + // See: RFC 3021 + if addr.Broadcast == nil && prefixlen < 31 { + calcBroadcast := make(net.IP, masklen/8) + for i := range localAddrData { + calcBroadcast[i] = localAddrData[i] | ^mask[i] + } + addr.Broadcast = calcBroadcast + } + + if addr.Broadcast != nil { + req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast)) + } + + if addr.Label != "" { + labelData := nl.NewRtAttr(unix.IFA_LABEL, nl.ZeroTerminated(addr.Label)) + req.AddData(labelData) } - addr.Broadcast = calcBroadcast } - req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast)) - if addr.Label != "" { - labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label)) - req.AddData(labelData) + // 0 is the default value for these attributes. However, 0 means "expired", while the least-surprising default + // value should be "forever". To compensate for that, only add the attributes if at least one of the values is + // non-zero, which means the caller has explicitly set them + if addr.ValidLft > 0 || addr.PreferedLft > 0 { + cachedata := nl.IfaCacheInfo{unix.IfaCacheinfo{ + Valid: uint32(addr.ValidLft), + Prefered: uint32(addr.PreferedLft), + }} + req.AddData(nl.NewRtAttr(unix.IFA_CACHEINFO, cachedata.Serialize())) } - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -132,11 +175,11 @@ func AddrList(link Link, family int) ([]Addr, error) { // Equivalent to: `ip addr show`. // The list can be filtered by link and ip family. func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { - req := h.newNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP) - msg := nl.NewIfInfomsg(family) + req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) + msg := nl.NewIfAddrmsg(family) req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWADDR) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR) if err != nil { return nil, err } @@ -150,12 +193,12 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { var res []Addr for _, m := range msgs { - addr, msgFamily, ifindex, err := parseAddr(m) + addr, msgFamily, err := parseAddr(m) if err != nil { return res, err } - if link != nil && ifindex != indexFilter { + if link != nil && addr.LinkIndex != indexFilter { // Ignore messages from other interfaces continue } @@ -170,11 +213,11 @@ func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { return res, nil } -func parseAddr(m []byte) (addr Addr, family, index int, err error) { +func parseAddr(m []byte) (addr Addr, family int, err error) { msg := nl.DeserializeIfAddrmsg(m) family = -1 - index = -1 + addr.LinkIndex = -1 attrs, err1 := nl.ParseRouteAttr(m[msg.Len():]) if err1 != nil { @@ -183,42 +226,58 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) { } family = int(msg.Family) - index = int(msg.Index) + addr.LinkIndex = int(msg.Index) var local, dst *net.IPNet for _, attr := range attrs { switch attr.Attr.Type { - case syscall.IFA_ADDRESS: + case unix.IFA_ADDRESS: dst = &net.IPNet{ IP: attr.Value, Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), } - addr.Peer = dst - case syscall.IFA_LOCAL: + case unix.IFA_LOCAL: + // iproute2 manual: + // If a peer address is specified, the local address + // cannot have a prefix length. The network prefix is + // associated with the peer rather than with the local + // address. + n := 8 * len(attr.Value) local = &net.IPNet{ IP: attr.Value, - Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)), + Mask: net.CIDRMask(n, n), } - addr.IPNet = local - case syscall.IFA_BROADCAST: + case unix.IFA_BROADCAST: addr.Broadcast = attr.Value - case syscall.IFA_LABEL: + case unix.IFA_LABEL: addr.Label = string(attr.Value[:len(attr.Value)-1]) - case IFA_FLAGS: + case unix.IFA_FLAGS: addr.Flags = int(native.Uint32(attr.Value[0:4])) - case nl.IFA_CACHEINFO: + case unix.IFA_CACHEINFO: ci := nl.DeserializeIfaCacheInfo(attr.Value) - addr.PreferedLft = int(ci.IfaPrefered) - addr.ValidLft = int(ci.IfaValid) + addr.PreferedLft = int(ci.Prefered) + addr.ValidLft = int(ci.Valid) } } - // IFA_LOCAL should be there but if not, fall back to IFA_ADDRESS + // libnl addr.c comment: + // IPv6 sends the local address as IFA_ADDRESS with no + // IFA_LOCAL, IPv4 sends both IFA_LOCAL and IFA_ADDRESS + // with IFA_ADDRESS being the peer address if they differ + // + // But obviously, as there are IPv6 PtP addresses, too, + // IFA_LOCAL should also be handled for IPv6. if local != nil { - addr.IPNet = local + if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) { + addr.IPNet = dst + } else { + addr.IPNet = local + addr.Peer = dst + } } else { addr.IPNet = dst } + addr.Scope = int(msg.Scope) return @@ -237,50 +296,119 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribe(netns.None(), netns.None(), ch, done) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribe(ns, netns.None(), ch, done) + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil) +} + +// AddrSubscribeOptions contains a set of options to use with +// AddrSubscribeWithOptions. +type AddrSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool + ReceiveBufferSize int + ReceiveTimeout *unix.Timeval } -func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR) +// AddrSubscribeWithOptions work like AddrSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, options AddrSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize, options.ReceiveTimeout) +} + +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int, rcvTimeout *unix.Timeval) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if done != nil { go func() { <-done s.Close() }() } + if rcvbuf != 0 { + err = pkgHandle.SetSocketReceiveBufferSize(rcvbuf, false) + if err != nil { + return err + } + } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETADDR, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + } go func() { defer close(ch) for { - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { - log.Printf("netlink.AddrSubscribe: Receive() error: %v", err) + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } return } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } msgType := m.Header.Type - if msgType != syscall.RTM_NEWADDR && msgType != syscall.RTM_DELADDR { - log.Printf("netlink.AddrSubscribe: bad message type: %d", msgType) + if msgType != unix.RTM_NEWADDR && msgType != unix.RTM_DELADDR { + if cberr != nil { + cberr(fmt.Errorf("bad message type: %d", msgType)) + } continue } - addr, _, ifindex, err := parseAddr(m.Data) + addr, _, err := parseAddr(m.Data) if err != nil { - log.Printf("netlink.AddrSubscribe: could not parse address: %v", err) + if cberr != nil { + cberr(fmt.Errorf("could not parse address: %v", err)) + } continue } ch <- AddrUpdate{LinkAddress: *addr.IPNet, - LinkIndex: ifindex, - NewAddr: msgType == syscall.RTM_NEWADDR, + LinkIndex: addr.LinkIndex, + NewAddr: msgType == unix.RTM_NEWADDR, Flags: addr.Flags, Scope: addr.Scope, PreferedLft: addr.PreferedLft, diff --git a/vendor/github.com/vishvananda/netlink/bpf_linux.go b/vendor/github.com/vishvananda/netlink/bpf_linux.go index 533743987a..96befbfe0c 100644 --- a/vendor/github.com/vishvananda/netlink/bpf_linux.go +++ b/vendor/github.com/vishvananda/netlink/bpf_linux.go @@ -1,49 +1,12 @@ package netlink -/* -#include -#include -#include -#include -#include -#include +import ( + "unsafe" -static int load_simple_bpf(int prog_type, int ret) { -#ifdef __NR_bpf - // { return ret; } - __u64 __attribute__((aligned(8))) insns[] = { - 0x00000000000000b7ull | ((__u64)ret<<32), - 0x0000000000000095ull, - }; - __u8 __attribute__((aligned(8))) license[] = "ASL2"; - // Copied from a header file since libc is notoriously slow to update. - // The call will succeed or fail and that will be our indication on - // whether or not it is supported. - struct { - __u32 prog_type; - __u32 insn_cnt; - __u64 insns; - __u64 license; - __u32 log_level; - __u32 log_size; - __u64 log_buf; - __u32 kern_version; - } __attribute__((aligned(8))) attr = { - .prog_type = prog_type, - .insn_cnt = 2, - .insns = (uintptr_t)&insns, - .license = (uintptr_t)&license, - }; - return syscall(__NR_bpf, 5, &attr, sizeof(attr)); -#else - errno = EINVAL; - return -1; -#endif -} -*/ -import "C" + "golang.org/x/sys/unix" +) -type BpfProgType C.int +type BpfProgType uint32 const ( BPF_PROG_TYPE_UNSPEC BpfProgType = iota @@ -53,10 +16,62 @@ const ( BPF_PROG_TYPE_SCHED_ACT BPF_PROG_TYPE_TRACEPOINT BPF_PROG_TYPE_XDP + BPF_PROG_TYPE_PERF_EVENT + BPF_PROG_TYPE_CGROUP_SKB + BPF_PROG_TYPE_CGROUP_SOCK + BPF_PROG_TYPE_LWT_IN + BPF_PROG_TYPE_LWT_OUT + BPF_PROG_TYPE_LWT_XMIT + BPF_PROG_TYPE_SOCK_OPS + BPF_PROG_TYPE_SK_SKB + BPF_PROG_TYPE_CGROUP_DEVICE + BPF_PROG_TYPE_SK_MSG + BPF_PROG_TYPE_RAW_TRACEPOINT + BPF_PROG_TYPE_CGROUP_SOCK_ADDR + BPF_PROG_TYPE_LWT_SEG6LOCAL + BPF_PROG_TYPE_LIRC_MODE2 + BPF_PROG_TYPE_SK_REUSEPORT + BPF_PROG_TYPE_FLOW_DISSECTOR + BPF_PROG_TYPE_CGROUP_SYSCTL + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE + BPF_PROG_TYPE_CGROUP_SOCKOPT + BPF_PROG_TYPE_TRACING + BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_EXT + BPF_PROG_TYPE_LSM + BPF_PROG_TYPE_SK_LOOKUP ) -// loadSimpleBpf loads a trivial bpf program for testing purposes -func loadSimpleBpf(progType BpfProgType, ret int) (int, error) { - fd, err := C.load_simple_bpf(C.int(progType), C.int(ret)) - return int(fd), err +type BPFAttr struct { + ProgType uint32 + InsnCnt uint32 + Insns uintptr + License uintptr + LogLevel uint32 + LogSize uint32 + LogBuf uintptr + KernVersion uint32 +} + +// loadSimpleBpf loads a trivial bpf program for testing purposes. +func loadSimpleBpf(progType BpfProgType, ret uint32) (int, error) { + insns := []uint64{ + 0x00000000000000b7 | (uint64(ret) << 32), + 0x0000000000000095, + } + license := []byte{'A', 'S', 'L', '2', '\x00'} + attr := BPFAttr{ + ProgType: uint32(progType), + InsnCnt: uint32(len(insns)), + Insns: uintptr(unsafe.Pointer(&insns[0])), + License: uintptr(unsafe.Pointer(&license[0])), + } + fd, _, errno := unix.Syscall(unix.SYS_BPF, + 5, /* bpf cmd */ + uintptr(unsafe.Pointer(&attr)), + unsafe.Sizeof(attr)) + if errno != 0 { + return 0, errno + } + return int(fd), nil } diff --git a/vendor/github.com/vishvananda/netlink/bridge_linux.go b/vendor/github.com/vishvananda/netlink/bridge_linux.go index a65d6a1319..6e1224c47b 100644 --- a/vendor/github.com/vishvananda/netlink/bridge_linux.go +++ b/vendor/github.com/vishvananda/netlink/bridge_linux.go @@ -2,9 +2,9 @@ package netlink import ( "fmt" - "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) // BridgeVlanList gets a map of device id to bridge vlan infos. @@ -16,12 +16,12 @@ func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { // BridgeVlanList gets a map of device id to bridge vlan infos. // Equivalent to: `bridge vlan show` func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) - msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) req.AddData(msg) - req.AddData(nl.NewRtAttr(nl.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN)))) + req.AddData(nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN)))) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK) if err != nil { return nil, err } @@ -35,7 +35,7 @@ func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) { } for _, attr := range attrs { switch attr.Attr.Type { - case nl.IFLA_AF_SPEC: + case unix.IFLA_AF_SPEC: //nested attr nestAttrs, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -63,7 +63,7 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanAdd adds a new vlan filter entry // Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(syscall.RTM_SETLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master) } // BridgeVlanDel adds a new vlan filter entry @@ -75,19 +75,19 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err // BridgeVlanDel adds a new vlan filter entry // Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]` func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error { - return h.bridgeVlanModify(syscall.RTM_DELLINK, link, vid, pvid, untagged, self, master) + return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master) } func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(cmd, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) msg.Index = int32(base.Index) req.AddData(msg) - br := nl.NewRtAttr(nl.IFLA_AF_SPEC, nil) + br := nl.NewRtAttr(unix.IFLA_AF_SPEC, nil) var flags uint16 if self { flags |= nl.BRIDGE_FLAGS_SELF @@ -96,7 +96,7 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged flags |= nl.BRIDGE_FLAGS_MASTER } if flags > 0 { - nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags)) + br.AddRtAttr(nl.IFLA_BRIDGE_FLAGS, nl.Uint16Attr(flags)) } vlanInfo := &nl.BridgeVlanInfo{Vid: vid} if pvid { @@ -105,11 +105,8 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged if untagged { vlanInfo.Flags |= nl.BRIDGE_VLAN_INFO_UNTAGGED } - nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) + br.AddRtAttr(nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize()) req.AddData(br) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) - if err != nil { - return err - } - return nil + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err } diff --git a/vendor/github.com/vishvananda/netlink/class.go b/vendor/github.com/vishvananda/netlink/class.go index 8ee13af48e..10ceffed8b 100644 --- a/vendor/github.com/vishvananda/netlink/class.go +++ b/vendor/github.com/vishvananda/netlink/class.go @@ -4,25 +4,76 @@ import ( "fmt" ) +// Class interfaces for all classes type Class interface { Attrs() *ClassAttrs Type() string } +// Generic networking statistics for netlink users. +// This file contains "gnet_" prefixed structs and relevant functions. +// See Documentation/networking/getn_stats.txt in Linux source code for more details. + +// GnetStatsBasic Ref: struct gnet_stats_basic { ... } +type GnetStatsBasic struct { + Bytes uint64 // number of seen bytes + Packets uint32 // number of seen packets +} + +// GnetStatsRateEst Ref: struct gnet_stats_rate_est { ... } +type GnetStatsRateEst struct { + Bps uint32 // current byte rate + Pps uint32 // current packet rate +} + +// GnetStatsRateEst64 Ref: struct gnet_stats_rate_est64 { ... } +type GnetStatsRateEst64 struct { + Bps uint64 // current byte rate + Pps uint64 // current packet rate +} + +// GnetStatsQueue Ref: struct gnet_stats_queue { ... } +type GnetStatsQueue struct { + Qlen uint32 // queue length + Backlog uint32 // backlog size of queue + Drops uint32 // number of dropped packets + Requeues uint32 // number of requues + Overlimits uint32 // number of enqueues over the limit +} + +// ClassStatistics representation based on generic networking statistics for netlink. +// See Documentation/networking/gen_stats.txt in Linux source code for more details. +type ClassStatistics struct { + Basic *GnetStatsBasic + Queue *GnetStatsQueue + RateEst *GnetStatsRateEst +} + +// NewClassStatistics Construct a ClassStatistics struct which fields are all initialized by 0. +func NewClassStatistics() *ClassStatistics { + return &ClassStatistics{ + Basic: &GnetStatsBasic{}, + Queue: &GnetStatsQueue{}, + RateEst: &GnetStatsRateEst{}, + } +} + // ClassAttrs represents a netlink class. A filter is associated with a link, // has a handle and a parent. The root filter of a device should have a // parent == HANDLE_ROOT. type ClassAttrs struct { - LinkIndex int - Handle uint32 - Parent uint32 - Leaf uint32 + LinkIndex int + Handle uint32 + Parent uint32 + Leaf uint32 + Statistics *ClassStatistics } func (q ClassAttrs) String() string { return fmt.Sprintf("{LinkIndex: %d, Handle: %s, Parent: %s, Leaf: %d}", q.LinkIndex, HandleStr(q.Handle), HandleStr(q.Parent), q.Leaf) } +// HtbClassAttrs stores the attributes of HTB class type HtbClassAttrs struct { // TODO handle all attributes Rate uint64 @@ -54,10 +105,12 @@ func (q HtbClass) String() string { return fmt.Sprintf("{Rate: %d, Ceil: %d, Buffer: %d, Cbuffer: %d}", q.Rate, q.Ceil, q.Buffer, q.Cbuffer) } +// Attrs returns the class attributes func (q *HtbClass) Attrs() *ClassAttrs { return &q.ClassAttrs } +// Type return the class type func (q *HtbClass) Type() string { return "htb" } @@ -69,10 +122,118 @@ type GenericClass struct { ClassType string } +// Attrs return the class attributes func (class *GenericClass) Attrs() *ClassAttrs { return &class.ClassAttrs } +// Type return the class type func (class *GenericClass) Type() string { return class.ClassType } + +// ServiceCurve is a nondecreasing function of some time unit, returning the amount of service +// (an allowed or allocated amount of bandwidth) at some specific point in time. The purpose of it +// should be subconsciously obvious: if a class was allowed to transfer not less than the amount +// specified by its service curve, then the service curve is not violated. +type ServiceCurve struct { + m1 uint32 + d uint32 + m2 uint32 +} + +// Attrs return the parameters of the service curve +func (c *ServiceCurve) Attrs() (uint32, uint32, uint32) { + return c.m1, c.d, c.m2 +} + +// Burst returns the burst rate (m1) of the curve +func (c *ServiceCurve) Burst() uint32 { + return c.m1 +} + +// Delay return the delay (d) of the curve +func (c *ServiceCurve) Delay() uint32 { + return c.d +} + +// Rate returns the rate (m2) of the curve +func (c *ServiceCurve) Rate() uint32 { + return c.m2 +} + +// HfscClass is a representation of the HFSC class +type HfscClass struct { + ClassAttrs + Rsc ServiceCurve + Fsc ServiceCurve + Usc ServiceCurve +} + +// SetUsc sets the USC curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetUsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Usc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetFsc sets the Fsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetFsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Fsc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetRsc sets the Rsc curve. The bandwidth (m1 and m2) is specified in bits and the delay in +// seconds. +func (hfsc *HfscClass) SetRsc(m1 uint32, d uint32, m2 uint32) { + hfsc.Rsc = ServiceCurve{m1: m1, d: d, m2: m2} +} + +// SetSC implements the SC from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetSC(m1 uint32, d uint32, m2 uint32) { + hfsc.SetRsc(m1, d, m2) + hfsc.SetFsc(m1, d, m2) +} + +// SetUL implements the UL from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetUL(m1 uint32, d uint32, m2 uint32) { + hfsc.SetUsc(m1, d, m2) +} + +// SetLS implements the LS from the `tc` CLI. This function behaves the same as if one would set the +// USC through the `tc` command-line tool. This means bandwidth (m1 and m2) is specified in bits and +// the delay in ms. +func (hfsc *HfscClass) SetLS(m1 uint32, d uint32, m2 uint32) { + hfsc.SetFsc(m1, d, m2) +} + +// NewHfscClass returns a new HFSC struct with the set parameters +func NewHfscClass(attrs ClassAttrs) *HfscClass { + return &HfscClass{ + ClassAttrs: attrs, + Rsc: ServiceCurve{}, + Fsc: ServiceCurve{}, + Usc: ServiceCurve{}, + } +} + +// String() returns a string that contains the information and attributes of the HFSC class +func (hfsc *HfscClass) String() string { + return fmt.Sprintf( + "{%s -- {RSC: {m1=%d d=%d m2=%d}} {FSC: {m1=%d d=%d m2=%d}} {USC: {m1=%d d=%d m2=%d}}}", + hfsc.Attrs(), hfsc.Rsc.m1*8, hfsc.Rsc.d, hfsc.Rsc.m2*8, hfsc.Fsc.m1*8, hfsc.Fsc.d, hfsc.Fsc.m2*8, hfsc.Usc.m1*8, hfsc.Usc.d, hfsc.Usc.m2*8, + ) +} + +// Attrs return the Hfsc parameters +func (hfsc *HfscClass) Attrs() *ClassAttrs { + return &hfsc.ClassAttrs +} + +// Type return the type of the class +func (hfsc *HfscClass) Type() string { + return "hfsc" +} diff --git a/vendor/github.com/vishvananda/netlink/class_linux.go b/vendor/github.com/vishvananda/netlink/class_linux.go index 91cd3883de..6f542ba4e7 100644 --- a/vendor/github.com/vishvananda/netlink/class_linux.go +++ b/vendor/github.com/vishvananda/netlink/class_linux.go @@ -1,13 +1,34 @@ package netlink import ( + "bytes" + "encoding/binary" + "encoding/hex" "errors" + "fmt" "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) -// NOTE: function is in here because it uses other linux functions +// Internal tc_stats representation in Go struct. +// This is for internal uses only to deserialize the payload of rtattr. +// After the deserialization, this should be converted into the canonical stats +// struct, ClassStatistics, in case of statistics of a class. +// Ref: struct tc_stats { ... } +type tcStats struct { + Bytes uint64 // Number of enqueued bytes + Packets uint32 // Number of enqueued packets + Drops uint32 // Packets dropped because of lack of resources + Overlimits uint32 // Number of throttle events when this flow goes out of allocated bandwidth + Bps uint32 // Current flow byte rate + Pps uint32 // Current flow packet rate + Qlen uint32 + Backlog uint32 +} + +// NewHtbClass NOTE: function is in here because it uses other linux functions func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { mtu := 1600 rate := cattrs.Rate / 8 @@ -22,12 +43,12 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { if buffer == 0 { buffer = uint32(float64(rate)/Hz() + float64(mtu)) } - buffer = uint32(Xmittime(rate, buffer)) + buffer = Xmittime(rate, buffer) if cbuffer == 0 { cbuffer = uint32(float64(ceil)/Hz() + float64(mtu)) } - cbuffer = uint32(Xmittime(ceil, cbuffer)) + cbuffer = Xmittime(ceil, cbuffer) return &HtbClass{ ClassAttrs: attrs, @@ -35,9 +56,9 @@ func NewHtbClass(attrs ClassAttrs, cattrs HtbClassAttrs) *HtbClass { Ceil: ceil, Buffer: buffer, Cbuffer: cbuffer, - Quantum: 10, Level: 0, - Prio: 0, + Prio: cattrs.Prio, + Quantum: cattrs.Quantum, } } @@ -50,7 +71,7 @@ func ClassDel(class Class) error { // ClassDel will delete a class from the system. // Equivalent to: `tc class del $class` func (h *Handle) ClassDel(class Class) error { - return h.classModify(syscall.RTM_DELTCLASS, 0, class) + return h.classModify(unix.RTM_DELTCLASS, 0, class) } // ClassChange will change a class in place @@ -64,7 +85,7 @@ func ClassChange(class Class) error { // Equivalent to: `tc class change $class` // The parent and handle MUST NOT be changed. func (h *Handle) ClassChange(class Class) error { - return h.classModify(syscall.RTM_NEWTCLASS, 0, class) + return h.classModify(unix.RTM_NEWTCLASS, 0, class) } // ClassReplace will replace a class to the system. @@ -82,7 +103,7 @@ func ClassReplace(class Class) error { // If a class already exist with this parent/handle pair, the class is changed. // If a class does not already exist with this parent/handle, a new class is created. func (h *Handle) ClassReplace(class Class) error { - return h.classModify(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE, class) + return h.classModify(unix.RTM_NEWTCLASS, unix.NLM_F_CREATE, class) } // ClassAdd will add a class to the system. @@ -95,14 +116,14 @@ func ClassAdd(class Class) error { // Equivalent to: `tc class add $class` func (h *Handle) ClassAdd(class Class) error { return h.classModify( - syscall.RTM_NEWTCLASS, - syscall.NLM_F_CREATE|syscall.NLM_F_EXCL, + unix.RTM_NEWTCLASS, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, class, ) } func (h *Handle) classModify(cmd, flags int, class Class) error { - req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) base := class.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -112,12 +133,12 @@ func (h *Handle) classModify(cmd, flags int, class Class) error { } req.AddData(msg) - if cmd != syscall.RTM_DELTCLASS { + if cmd != unix.RTM_DELTCLASS { if err := classPayload(req, class); err != nil { return err } } - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -125,7 +146,9 @@ func classPayload(req *nl.NetlinkRequest, class Class) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(class.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if htb, ok := class.(*HtbClass); ok { + switch class.Type() { + case "htb": + htb := class.(*HtbClass) opt := nl.TcHtbCopt{} opt.Buffer = htb.Buffer opt.Cbuffer = htb.Cbuffer @@ -141,18 +164,36 @@ func classPayload(req *nl.NetlinkRequest, class Class) error { var rtab [256]uint32 var ctab [256]uint32 tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)} - if CalcRtable(&tcrate, rtab, cellLog, uint32(mtu), linklayer) < 0 { + if CalcRtable(&tcrate, rtab[:], cellLog, uint32(mtu), linklayer) < 0 { return errors.New("HTB: failed to calculate rate table") } opt.Rate = tcrate tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)} - if CalcRtable(&tcceil, ctab, ccellLog, uint32(mtu), linklayer) < 0 { + if CalcRtable(&tcceil, ctab[:], ccellLog, uint32(mtu), linklayer) < 0 { return errors.New("HTB: failed to calculate ceil rate table") } opt.Ceil = tcceil - nl.NewRtAttrChild(options, nl.TCA_HTB_PARMS, opt.Serialize()) - nl.NewRtAttrChild(options, nl.TCA_HTB_RTAB, SerializeRtab(rtab)) - nl.NewRtAttrChild(options, nl.TCA_HTB_CTAB, SerializeRtab(ctab)) + options.AddRtAttr(nl.TCA_HTB_PARMS, opt.Serialize()) + options.AddRtAttr(nl.TCA_HTB_RTAB, SerializeRtab(rtab)) + options.AddRtAttr(nl.TCA_HTB_CTAB, SerializeRtab(ctab)) + if htb.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_RATE64, nl.Uint64Attr(htb.Rate)) + } + if htb.Ceil >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_HTB_CEIL64, nl.Uint64Attr(htb.Ceil)) + } + case "hfsc": + hfsc := class.(*HfscClass) + opt := nl.HfscCopt{} + rm1, rd, rm2 := hfsc.Rsc.Attrs() + opt.Rsc.Set(rm1/8, rd, rm2/8) + fm1, fd, fm2 := hfsc.Fsc.Attrs() + opt.Fsc.Set(fm1/8, fd, fm2/8) + um1, ud, um2 := hfsc.Usc.Attrs() + opt.Usc.Set(um1/8, ud, um2/8) + options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) + options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) + options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) } req.AddData(options) return nil @@ -169,7 +210,7 @@ func ClassList(link Link, parent uint32) ([]Class, error) { // Equivalent to: `tc class show`. // Generally returns nothing if link and parent are not specified. func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { - req := h.newNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(unix.RTM_GETTCLASS, unix.NLM_F_DUMP) msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, Parent: parent, @@ -181,7 +222,7 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { } req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTCLASS) if err != nil { return nil, err } @@ -196,9 +237,10 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { } base := ClassAttrs{ - LinkIndex: int(msg.Ifindex), - Handle: msg.Handle, - Parent: msg.Parent, + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Statistics: nil, } var class Class @@ -210,6 +252,8 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { switch classType { case "htb": class = &HtbClass{} + case "hfsc": + class = &HfscClass{} default: class = &GenericClass{ClassType: classType} } @@ -224,6 +268,26 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) { if err != nil { return nil, err } + case "hfsc": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + _, err = parseHfscClassData(class, data) + if err != nil { + return nil, err + } + } + // For backward compatibility. + case nl.TCA_STATS: + base.Statistics, err = parseTcStats(attr.Value) + if err != nil { + return nil, err + } + case nl.TCA_STATS2: + base.Statistics, err = parseTcStats2(attr.Value) + if err != nil { + return nil, err } } } @@ -248,7 +312,84 @@ func parseHtbClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, erro htb.Quantum = opt.Quantum htb.Level = opt.Level htb.Prio = opt.Prio + case nl.TCA_HTB_RATE64: + htb.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_HTB_CEIL64: + htb.Ceil = native.Uint64(datum.Value[0:8]) + } + } + return detailed, nil +} + +func parseHfscClassData(class Class, data []syscall.NetlinkRouteAttr) (bool, error) { + hfsc := class.(*HfscClass) + detailed := false + for _, datum := range data { + m1, d, m2 := nl.DeserializeHfscCurve(datum.Value).Attrs() + switch datum.Attr.Type { + case nl.TCA_HFSC_RSC: + hfsc.Rsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} + case nl.TCA_HFSC_FSC: + hfsc.Fsc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} + case nl.TCA_HFSC_USC: + hfsc.Usc = ServiceCurve{m1: m1 * 8, d: d, m2: m2 * 8} } } return detailed, nil } + +func parseTcStats(data []byte) (*ClassStatistics, error) { + buf := &bytes.Buffer{} + buf.Write(data) + tcStats := &tcStats{} + if err := binary.Read(buf, native, tcStats); err != nil { + return nil, err + } + + stats := NewClassStatistics() + stats.Basic.Bytes = tcStats.Bytes + stats.Basic.Packets = tcStats.Packets + stats.Queue.Qlen = tcStats.Qlen + stats.Queue.Backlog = tcStats.Backlog + stats.Queue.Drops = tcStats.Drops + stats.Queue.Overlimits = tcStats.Overlimits + stats.RateEst.Bps = tcStats.Bps + stats.RateEst.Pps = tcStats.Pps + + return stats, nil +} + +func parseGnetStats(data []byte, gnetStats interface{}) error { + buf := &bytes.Buffer{} + buf.Write(data) + return binary.Read(buf, native, gnetStats) +} + +func parseTcStats2(data []byte) (*ClassStatistics, error) { + rtAttrs, err := nl.ParseRouteAttr(data) + if err != nil { + return nil, err + } + stats := NewClassStatistics() + for _, datum := range rtAttrs { + switch datum.Attr.Type { + case nl.TCA_STATS_BASIC: + if err := parseGnetStats(datum.Value, stats.Basic); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.Basic with: %v\n%s", + err, hex.Dump(datum.Value)) + } + case nl.TCA_STATS_QUEUE: + if err := parseGnetStats(datum.Value, stats.Queue); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.Queue with: %v\n%s", + err, hex.Dump(datum.Value)) + } + case nl.TCA_STATS_RATE_EST: + if err := parseGnetStats(datum.Value, stats.RateEst); err != nil { + return nil, fmt.Errorf("Failed to parse ClassStatistics.RateEst with: %v\n%s", + err, hex.Dump(datum.Value)) + } + } + } + + return stats, nil +} diff --git a/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/conntrack_linux.go index ecf0445659..03ea1b98fc 100644 --- a/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -6,9 +6,10 @@ import ( "errors" "fmt" "net" - "syscall" + "time" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) // ConntrackTableType Conntrack table for the netlink operation @@ -22,11 +23,7 @@ const ( // https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/nfnetlink.h -> #define NFNL_SUBSYS_CTNETLINK_EXP 2 ConntrackExpectTable = 2 ) -const ( - // For Parsing Mark - TCP_PROTO = 6 - UDP_PROTO = 17 -) + const ( // backward compatibility with golang 1.6 which does not have io.SeekCurrent seekCurrent = 1 @@ -85,8 +82,8 @@ func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily) // conntrack -F [table] Flush table // The flush operation applies to all the family types func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error { - req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK) - _, err := req.Execute(syscall.NETLINK_NETFILTER, 0) + req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) + _, err := req.Execute(unix.NETLINK_NETFILTER, 0) return err } @@ -102,10 +99,10 @@ func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFami for _, dataRaw := range res { flow := parseRawData(dataRaw) if match := filter.MatchConntrackFlow(flow); match { - req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK) + req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK) // skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already req2.AddRawData(dataRaw[4:]) - req2.Execute(syscall.NETLINK_NETFILTER, 0) + req2.Execute(unix.NETLINK_NETFILTER, 0) matched++ } } @@ -127,19 +124,21 @@ func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily } func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) { - req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP) - return req.Execute(syscall.NETLINK_NETFILTER, 0) + req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP) + return req.Execute(unix.NETLINK_NETFILTER, 0) } // The full conntrack flow structure is very complicated and can be found in the file: // http://git.netfilter.org/libnetfilter_conntrack/tree/include/internal/object.h // For the time being, the structure below allows to parse and extract the base information of a flow type ipTuple struct { - SrcIP net.IP + Bytes uint64 DstIP net.IP + DstPort uint16 + Packets uint64 Protocol uint8 + SrcIP net.IP SrcPort uint16 - DstPort uint16 } type ConntrackFlow struct { @@ -147,15 +146,23 @@ type ConntrackFlow struct { Forward ipTuple Reverse ipTuple Mark uint32 + TimeStart uint64 + TimeStop uint64 + TimeOut uint32 } func (s *ConntrackFlow) String() string { // conntrack cmd output: - // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 mark=0 - return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d\tsrc=%s dst=%s sport=%d dport=%d mark=%d", + // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 + // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) + start := time.Unix(0, int64(s.TimeStart)) + stop := time.Unix(0, int64(s.TimeStop)) + timeout := int32(s.TimeOut) + return fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x start=%v stop=%v timeout=%d(sec)", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, - s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, - s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Mark) + s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, + s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, + s.Mark, start, stop, timeout) } // This method parse the ip tuple structure @@ -175,25 +182,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { tpl.DstIP = v } } - // Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO - reader.Seek(4, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) + // Get total length of nested protocol-specific info. + _, _, protoInfoTotalLen := parseNfAttrTL(reader) + _, t, l, v := parseNfAttrTLV(reader) + // Track the number of bytes read. + protoInfoBytesRead := uint16(nl.SizeofNfattr) + l if t == nl.CTA_PROTO_NUM { tpl.Protocol = uint8(v[0]) } - // Skip some padding 3 bytes + // We only parse TCP & UDP headers. Skip the others. + if tpl.Protocol != 6 && tpl.Protocol != 17 { + // skip the rest + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol + } + // Skip 3 bytes of padding reader.Seek(3, seekCurrent) + protoInfoBytesRead += 3 for i := 0; i < 2; i++ { _, t, _ := parseNfAttrTL(reader) + protoInfoBytesRead += uint16(nl.SizeofNfattr) switch t { case nl.CTA_PROTO_SRC_PORT: parseBERaw16(reader, &tpl.SrcPort) + protoInfoBytesRead += 2 case nl.CTA_PROTO_DST_PORT: parseBERaw16(reader, &tpl.DstPort) + protoInfoBytesRead += 2 } - // Skip some padding 2 byte + // Skip 2 bytes of padding reader.Seek(2, seekCurrent) + protoInfoBytesRead += 2 } + // Skip any remaining/unknown parts of the message + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol } @@ -212,17 +237,77 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { binary.Read(r, nl.NativeEndian(), &attrType) isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED attrType = attrType & (nl.NLA_F_NESTED - 1) - return isNested, attrType, len } +func skipNfAttrValue(r *bytes.Reader, len uint16) { + len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) + r.Seek(int64(len), seekCurrent) +} + func parseBERaw16(r *bytes.Reader, v *uint16) { binary.Read(r, binary.BigEndian, v) } +func parseBERaw32(r *bytes.Reader, v *uint32) { + binary.Read(r, binary.BigEndian, v) +} + +func parseBERaw64(r *bytes.Reader, v *uint64) { + binary.Read(r, binary.BigEndian, v) +} + +func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { + for i := 0; i < 2; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_COUNTERS_BYTES: + parseBERaw64(r, &bytes) + case nl.CTA_COUNTERS_PACKETS: + parseBERaw64(r, &packets) + default: + return + } + } + return +} + +// when the flow is alive, only the timestamp_start is returned in structure +func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { + var numTimeStamps int + oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp + if readSize == uint16(oneItem) { + numTimeStamps = 1 + } else if readSize == 2*uint16(oneItem) { + numTimeStamps = 2 + } else { + return + } + for i := 0; i < numTimeStamps; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_TIMESTAMP_START: + parseBERaw64(r, &tstart) + case nl.CTA_TIMESTAMP_STOP: + parseBERaw64(r, &tstop) + default: + return + } + } + return + +} + +func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { + parseBERaw32(r, &ttimeout) + return +} + +func parseConnectionMark(r *bytes.Reader) (mark uint32) { + parseBERaw32(r, &mark) + return +} + func parseRawData(data []byte) *ConntrackFlow { s := &ConntrackFlow{} - var proto uint8 // First there is the Nfgenmsg header // consume only the family field reader := bytes.NewReader(data) @@ -238,36 +323,43 @@ func parseRawData(data []byte) *ConntrackFlow { // 4 bytes // flow information of the reverse flow for reader.Len() > 0 { - nested, t, l := parseNfAttrTL(reader) - if nested && t == nl.CTA_TUPLE_ORIG { - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { - proto = parseIpTuple(reader, &s.Forward) + if nested, t, l := parseNfAttrTL(reader); nested { + switch t { + case nl.CTA_TUPLE_ORIG: + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + parseIpTuple(reader, &s.Forward) + } + case nl.CTA_TUPLE_REPLY: + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + parseIpTuple(reader, &s.Reverse) + } else { + // Header not recognized skip it + skipNfAttrValue(reader, l) + } + case nl.CTA_COUNTERS_ORIG: + s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_COUNTERS_REPLY: + s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_TIMESTAMP: + s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) + case nl.CTA_PROTOINFO: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) } - } else if nested && t == nl.CTA_TUPLE_REPLY { - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { - parseIpTuple(reader, &s.Reverse) - - // Got all the useful information stop parsing - break - } else { - // Header not recognized skip it - reader.Seek(int64(l), seekCurrent) + } else { + switch t { + case nl.CTA_MARK: + s.Mark = parseConnectionMark(reader) + case nl.CTA_TIMEOUT: + s.TimeOut = parseTimeOut(reader) + case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) } } } - if proto == TCP_PROTO { - reader.Seek(64, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) - if t == nl.CTA_MARK { - s.Mark = uint32(v[3]) - } - } else if proto == UDP_PROTO { - reader.Seek(16, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) - if t == nl.CTA_MARK { - s.Mark = uint32(v[3]) - } - } return s } @@ -285,7 +377,7 @@ func parseRawData(data []byte) *ConntrackFlow { // Common parameters and options: // -s, --src, --orig-src ip Source address from original direction // -d, --dst, --orig-dst ip Destination address from original direction -// -r, --reply-src ip Source addres from reply direction +// -r, --reply-src ip Source address from reply direction // -q, --reply-dst ip Destination address from reply direction // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' // -f, --family proto Layer 3 Protocol, eg. 'ipv6' @@ -298,15 +390,25 @@ func parseRawData(data []byte) *ConntrackFlow { // --mask-src ip Source mask address // --mask-dst ip Destination mask address +// Layer 4 Protocol common parameters and options: +// TCP, UDP, SCTP, UDPLite and DCCP +// --sport, --orig-port-src port Source port in original direction +// --dport, --orig-port-dst port Destination port in original direction + // Filter types type ConntrackFilterType uint8 const ( - ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction - ConntrackOrigDstIP // -orig-dst ip Destination address from original direction - ConntrackNatSrcIP // -src-nat ip Source NAT ip - ConntrackNatDstIP // -dst-nat ip Destination NAT ip - ConntrackNatAnyIP // -any-nat ip Source or destination NAT ip + ConntrackOrigSrcIP = iota // -orig-src ip Source address from original direction + ConntrackOrigDstIP // -orig-dst ip Destination address from original direction + ConntrackReplySrcIP // --reply-src ip Reply Source IP + ConntrackReplyDstIP // --reply-dst ip Reply Destination IP + ConntrackReplyAnyIP // Match source or destination reply IP + ConntrackOrigSrcPort // --orig-port-src port Source port in original direction + ConntrackOrigDstPort // --orig-port-dst port Destination port in original direction + ConntrackNatSrcIP = ConntrackReplySrcIP // deprecated use instead ConntrackReplySrcIP + ConntrackNatDstIP = ConntrackReplyDstIP // deprecated use instead ConntrackReplyDstIP + ConntrackNatAnyIP = ConntrackReplyAnyIP // deprecated use instead ConntrackReplyAnyIP ) type CustomConntrackFilter interface { @@ -316,53 +418,117 @@ type CustomConntrackFilter interface { } type ConntrackFilter struct { - ipFilter map[ConntrackFilterType]net.IP + ipNetFilter map[ConntrackFilterType]*net.IPNet + portFilter map[ConntrackFilterType]uint16 + protoFilter uint8 +} + +// AddIPNet adds a IP subnet to the conntrack filter +func (f *ConntrackFilter) AddIPNet(tp ConntrackFilterType, ipNet *net.IPNet) error { + if ipNet == nil { + return fmt.Errorf("Filter attribute empty") + } + if f.ipNetFilter == nil { + f.ipNetFilter = make(map[ConntrackFilterType]*net.IPNet) + } + if _, ok := f.ipNetFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.ipNetFilter[tp] = ipNet + return nil } // AddIP adds an IP to the conntrack filter func (f *ConntrackFilter) AddIP(tp ConntrackFilterType, ip net.IP) error { - if f.ipFilter == nil { - f.ipFilter = make(map[ConntrackFilterType]net.IP) + if ip == nil { + return fmt.Errorf("Filter attribute empty") } - if _, ok := f.ipFilter[tp]; ok { + return f.AddIPNet(tp, NewIPNet(ip)) +} + +// AddPort adds a Port to the conntrack filter if the Layer 4 protocol allows it +func (f *ConntrackFilter) AddPort(tp ConntrackFilterType, port uint16) error { + switch f.protoFilter { + // TCP, UDP, DCCP, SCTP, UDPLite + case 6, 17, 33, 132, 136: + default: + return fmt.Errorf("Filter attribute not available without a valid Layer 4 protocol: %d", f.protoFilter) + } + + if f.portFilter == nil { + f.portFilter = make(map[ConntrackFilterType]uint16) + } + if _, ok := f.portFilter[tp]; ok { + return errors.New("Filter attribute already present") + } + f.portFilter[tp] = port + return nil +} + +// AddProtocol adds the Layer 4 protocol to the conntrack filter +func (f *ConntrackFilter) AddProtocol(proto uint8) error { + if f.protoFilter != 0 { return errors.New("Filter attribute already present") } - f.ipFilter[tp] = ip + f.protoFilter = proto return nil } // MatchConntrackFlow applies the filter to the flow and returns true if the flow matches the filter // false otherwise func (f *ConntrackFilter) MatchConntrackFlow(flow *ConntrackFlow) bool { - if len(f.ipFilter) == 0 { + if len(f.ipNetFilter) == 0 && len(f.portFilter) == 0 && f.protoFilter == 0 { // empty filter always not match return false } - match := true - // -orig-src ip Source address from original direction - if elem, found := f.ipFilter[ConntrackOrigSrcIP]; found { - match = match && elem.Equal(flow.Forward.SrcIP) + // -p, --protonum proto Layer 4 Protocol, eg. 'tcp' + if f.protoFilter != 0 && flow.Forward.Protocol != f.protoFilter { + // different Layer 4 protocol always not match + return false } - // -orig-dst ip Destination address from original direction - if elem, found := f.ipFilter[ConntrackOrigDstIP]; match && found { - match = match && elem.Equal(flow.Forward.DstIP) - } + match := true - // -src-nat ip Source NAT ip - if elem, found := f.ipFilter[ConntrackNatSrcIP]; match && found { - match = match && elem.Equal(flow.Reverse.SrcIP) - } + // IP conntrack filter + if len(f.ipNetFilter) > 0 { + // -orig-src ip Source address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigSrcIP]; found { + match = match && elem.Contains(flow.Forward.SrcIP) + } + + // -orig-dst ip Destination address from original direction + if elem, found := f.ipNetFilter[ConntrackOrigDstIP]; match && found { + match = match && elem.Contains(flow.Forward.DstIP) + } + + // -src-nat ip Source NAT ip + if elem, found := f.ipNetFilter[ConntrackReplySrcIP]; match && found { + match = match && elem.Contains(flow.Reverse.SrcIP) + } + + // -dst-nat ip Destination NAT ip + if elem, found := f.ipNetFilter[ConntrackReplyDstIP]; match && found { + match = match && elem.Contains(flow.Reverse.DstIP) + } - // -dst-nat ip Destination NAT ip - if elem, found := f.ipFilter[ConntrackNatDstIP]; match && found { - match = match && elem.Equal(flow.Reverse.DstIP) + // Match source or destination reply IP + if elem, found := f.ipNetFilter[ConntrackReplyAnyIP]; match && found { + match = match && (elem.Contains(flow.Reverse.SrcIP) || elem.Contains(flow.Reverse.DstIP)) + } } - // -any-nat ip Source or destination NAT ip - if elem, found := f.ipFilter[ConntrackNatAnyIP]; match && found { - match = match && (elem.Equal(flow.Reverse.SrcIP) || elem.Equal(flow.Reverse.DstIP)) + // Layer 4 Port filter + if len(f.portFilter) > 0 { + // -orig-port-src port Source port from original direction + if elem, found := f.portFilter[ConntrackOrigSrcPort]; match && found { + match = match && elem == flow.Forward.SrcPort + } + + // -orig-port-dst port Destination port from original direction + if elem, found := f.portFilter[ConntrackOrigDstPort]; match && found { + match = match && elem == flow.Forward.DstPort + } } return match diff --git a/vendor/github.com/vishvananda/netlink/devlink_linux.go b/vendor/github.com/vishvananda/netlink/devlink_linux.go new file mode 100644 index 0000000000..358b232c6c --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -0,0 +1,728 @@ +package netlink + +import ( + "fmt" + "net" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// DevlinkDevEswitchAttr represents device's eswitch attributes +type DevlinkDevEswitchAttr struct { + Mode string + InlineMode string + EncapMode string +} + +// DevlinkDevAttrs represents device attributes +type DevlinkDevAttrs struct { + Eswitch DevlinkDevEswitchAttr +} + +// DevlinkDevice represents device and its attributes +type DevlinkDevice struct { + BusName string + DeviceName string + Attrs DevlinkDevAttrs +} + +// DevlinkPortFn represents port function and its attributes +type DevlinkPortFn struct { + HwAddr net.HardwareAddr + State uint8 + OpState uint8 +} + +// DevlinkPortFnSetAttrs represents attributes to set +type DevlinkPortFnSetAttrs struct { + FnAttrs DevlinkPortFn + HwAddrValid bool + StateValid bool +} + +// DevlinkPort represents port and its attributes +type DevlinkPort struct { + BusName string + DeviceName string + PortIndex uint32 + PortType uint16 + NetdeviceName string + NetdevIfIndex uint32 + RdmaDeviceName string + PortFlavour uint16 + Fn *DevlinkPortFn +} + +type DevLinkPortAddAttrs struct { + Controller uint32 + SfNumber uint32 + PortIndex uint32 + PfNumber uint16 + SfNumberValid bool + PortIndexValid bool + ControllerValid bool +} + +// DevlinkDeviceInfo represents devlink info +type DevlinkDeviceInfo struct { + Driver string + SerialNumber string + BoardID string + FwApp string + FwAppBoundleID string + FwAppName string + FwBoundleID string + FwMgmt string + FwMgmtAPI string + FwMgmtBuild string + FwNetlist string + FwNetlistBuild string + FwPsidAPI string + FwUndi string +} + +func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { + devices := make([]*DevlinkDevice, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &DevlinkDevice{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + devices = append(devices, dev) + } + return devices, nil +} + +func eswitchStringToMode(modeName string) (uint16, error) { + if modeName == "legacy" { + return nl.DEVLINK_ESWITCH_MODE_LEGACY, nil + } else if modeName == "switchdev" { + return nl.DEVLINK_ESWITCH_MODE_SWITCHDEV, nil + } else { + return 0xffff, fmt.Errorf("invalid switchdev mode") + } +} + +func parseEswitchMode(mode uint16) string { + var eswitchMode = map[uint16]string{ + nl.DEVLINK_ESWITCH_MODE_LEGACY: "legacy", + nl.DEVLINK_ESWITCH_MODE_SWITCHDEV: "switchdev", + } + if eswitchMode[mode] == "" { + return "unknown" + } else { + return eswitchMode[mode] + } +} + +func parseEswitchInlineMode(inlinemode uint8) string { + var eswitchInlineMode = map[uint8]string{ + nl.DEVLINK_ESWITCH_INLINE_MODE_NONE: "none", + nl.DEVLINK_ESWITCH_INLINE_MODE_LINK: "link", + nl.DEVLINK_ESWITCH_INLINE_MODE_NETWORK: "network", + nl.DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT: "transport", + } + if eswitchInlineMode[inlinemode] == "" { + return "unknown" + } else { + return eswitchInlineMode[inlinemode] + } +} + +func parseEswitchEncapMode(encapmode uint8) string { + var eswitchEncapMode = map[uint8]string{ + nl.DEVLINK_ESWITCH_ENCAP_MODE_NONE: "disable", + nl.DEVLINK_ESWITCH_ENCAP_MODE_BASIC: "enable", + } + if eswitchEncapMode[encapmode] == "" { + return "unknown" + } else { + return eswitchEncapMode[encapmode] + } +} + +func (d *DevlinkDevice) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + d.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + d.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_ESWITCH_MODE: + d.Attrs.Eswitch.Mode = parseEswitchMode(native.Uint16(a.Value)) + case nl.DEVLINK_ATTR_ESWITCH_INLINE_MODE: + d.Attrs.Eswitch.InlineMode = parseEswitchInlineMode(uint8(a.Value[0])) + case nl.DEVLINK_ATTR_ESWITCH_ENCAP_MODE: + d.Attrs.Eswitch.EncapMode = parseEswitchEncapMode(uint8(a.Value[0])) + } + } + return nil +} + +func (dev *DevlinkDevice) parseEswitchAttrs(msgs [][]byte) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return + } + dev.parseAttributes(attrs) +} + +func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) { + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_ESWITCH_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK) + req.AddData(msg) + + b := make([]byte, len(dev.BusName)+1) + copy(b, dev.BusName) + data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) + req.AddData(data) + + b = make([]byte, len(dev.DeviceName)+1) + copy(b, dev.DeviceName) + data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) + req.AddData(data) + + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return + } + dev.parseEswitchAttrs(msgs) +} + +// DevLinkGetDeviceList provides a pointer to devlink devices and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetDeviceList() ([]*DevlinkDevice, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + devices, err := parseDevLinkDeviceList(msgs) + if err != nil { + return nil, err + } + for _, d := range devices { + h.getEswitchAttrs(f, d) + } + return devices, nil +} + +// DevLinkGetDeviceList provides a pointer to devlink devices and nil error, +// otherwise returns an error code. +func DevLinkGetDeviceList() ([]*DevlinkDevice, error) { + return pkgHandle.DevLinkGetDeviceList() +} + +func parseDevlinkDevice(msgs [][]byte) (*DevlinkDevice, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + dev := &DevlinkDevice{} + if err = dev.parseAttributes(attrs); err != nil { + return nil, err + } + return dev, nil +} + +func (h *Handle) createCmdReq(cmd uint8, bus string, device string) (*GenlFamily, *nl.NetlinkRequest, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, nil, err + } + + msg := &nl.Genlmsg{ + Command: cmd, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK) + req.AddData(msg) + + b := make([]byte, len(bus)+1) + copy(b, bus) + data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) + req.AddData(data) + + b = make([]byte, len(device)+1) + copy(b, device) + data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) + req.AddData(data) + + return f, req, nil +} + +// DevlinkGetDeviceByName provides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) { + f, req, err := h.createCmdReq(nl.DEVLINK_CMD_GET, Bus, Device) + if err != nil { + return nil, err + } + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + dev, err := parseDevlinkDevice(respmsg) + if err == nil { + h.getEswitchAttrs(f, dev) + } + return dev, err +} + +// DevlinkGetDeviceByName provides a pointer to devlink device and nil error, +// otherwise returns an error code. +func DevLinkGetDeviceByName(Bus string, Device string) (*DevlinkDevice, error) { + return pkgHandle.DevLinkGetDeviceByName(Bus, Device) +} + +// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or +// returns an error code. +// Equivalent to: `devlink dev eswitch set $dev mode switchdev` +// Equivalent to: `devlink dev eswitch set $dev mode legacy` +func (h *Handle) DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { + mode, err := eswitchStringToMode(NewMode) + if err != nil { + return err + } + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_ESWITCH_SET, Dev.BusName, Dev.DeviceName) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_ESWITCH_MODE, nl.Uint16Attr(mode))) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkSetEswitchMode sets eswitch mode if able to set successfully or +// returns an error code. +// Equivalent to: `devlink dev eswitch set $dev mode switchdev` +// Equivalent to: `devlink dev eswitch set $dev mode legacy` +func DevLinkSetEswitchMode(Dev *DevlinkDevice, NewMode string) error { + return pkgHandle.DevLinkSetEswitchMode(Dev, NewMode) +} + +func (port *DevlinkPort) parseAttributes(attrs []syscall.NetlinkRouteAttr) error { + for _, a := range attrs { + switch a.Attr.Type { + case nl.DEVLINK_ATTR_BUS_NAME: + port.BusName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_DEV_NAME: + port.DeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_INDEX: + port.PortIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_TYPE: + port.PortType = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_NETDEV_NAME: + port.NetdeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_NETDEV_IFINDEX: + port.NetdevIfIndex = native.Uint32(a.Value) + case nl.DEVLINK_ATTR_PORT_IBDEV_NAME: + port.RdmaDeviceName = string(a.Value[:len(a.Value)-1]) + case nl.DEVLINK_ATTR_PORT_FLAVOUR: + port.PortFlavour = native.Uint16(a.Value) + case nl.DEVLINK_ATTR_PORT_FUNCTION: + port.Fn = &DevlinkPortFn{} + for nested := range nl.ParseAttributes(a.Value) { + switch nested.Type { + case nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR: + port.Fn.HwAddr = nested.Value[:] + case nl.DEVLINK_PORT_FN_ATTR_STATE: + port.Fn.State = uint8(nested.Value[0]) + case nl.DEVLINK_PORT_FN_ATTR_OPSTATE: + port.Fn.OpState = uint8(nested.Value[0]) + } + } + } + } + return nil +} + +func parseDevLinkAllPortList(msgs [][]byte) ([]*DevlinkPort, error) { + ports := make([]*DevlinkPort, 0, len(msgs)) + for _, m := range msgs { + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + ports = append(ports, port) + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetAllPortList() ([]*DevlinkPort, error) { + f, err := h.GenlFamilyGet(nl.GENL_DEVLINK_NAME) + if err != nil { + return nil, err + } + msg := &nl.Genlmsg{ + Command: nl.DEVLINK_CMD_PORT_GET, + Version: nl.GENL_DEVLINK_VERSION, + } + req := h.newNetlinkRequest(int(f.ID), + unix.NLM_F_REQUEST|unix.NLM_F_ACK|unix.NLM_F_DUMP) + req.AddData(msg) + msgs, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + ports, err := parseDevLinkAllPortList(msgs) + if err != nil { + return nil, err + } + return ports, nil +} + +// DevLinkGetPortList provides a pointer to devlink ports and nil error, +// otherwise returns an error code. +func DevLinkGetAllPortList() ([]*DevlinkPort, error) { + return pkgHandle.DevLinkGetAllPortList() +} + +func parseDevlinkPortMsg(msgs [][]byte) (*DevlinkPort, error) { + m := msgs[0] + attrs, err := nl.ParseRouteAttr(m[nl.SizeofGenlmsg:]) + if err != nil { + return nil, err + } + port := &DevlinkPort{} + if err = port.parseAttributes(attrs); err != nil { + return nil, err + } + return port, nil +} + +// DevLinkGetPortByIndexprovides a pointer to devlink device and nil error, +// otherwise returns an error code. +func (h *Handle) DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_GET, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkGetPortByIndex provides a pointer to devlink portand nil error, +// otherwise returns an error code. +func DevLinkGetPortByIndex(Bus string, Device string, PortIndex uint32) (*DevlinkPort, error) { + return pkgHandle.DevLinkGetPortByIndex(Bus, Device, PortIndex) +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func (h *Handle) DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_NEW, Bus, Device) + if err != nil { + return nil, err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FLAVOUR, nl.Uint16Attr(Flavour))) + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_PF_NUMBER, nl.Uint16Attr(Attrs.PfNumber))) + if Flavour == nl.DEVLINK_PORT_FLAVOUR_PCI_SF && Attrs.SfNumberValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_PCI_SF_NUMBER, nl.Uint32Attr(Attrs.SfNumber))) + } + if Attrs.PortIndexValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(Attrs.PortIndex))) + } + if Attrs.ControllerValid { + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, nl.Uint32Attr(Attrs.Controller))) + } + respmsg, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + port, err := parseDevlinkPortMsg(respmsg) + return port, err +} + +// DevLinkPortAdd adds a devlink port and returns a port on success +// otherwise returns nil port and an error code. +func DevLinkPortAdd(Bus string, Device string, Flavour uint16, Attrs DevLinkPortAddAttrs) (*DevlinkPort, error) { + return pkgHandle.DevLinkPortAdd(Bus, Device, Flavour, Attrs) +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func (h *Handle) DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_DEL, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevLinkPortDel deletes a devlink port and returns success or error code. +func DevLinkPortDel(Bus string, Device string, PortIndex uint32) error { + return pkgHandle.DevLinkPortDel(Bus, Device, PortIndex) +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_PORT_SET, Bus, Device) + if err != nil { + return err + } + + req.AddData(nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_INDEX, nl.Uint32Attr(PortIndex))) + + fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil) + + if FnAttrs.HwAddrValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr)) + } + + if FnAttrs.StateValid { + fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State)) + } + req.AddData(fnAttr) + + _, err = req.Execute(unix.NETLINK_GENERIC, 0) + return err +} + +// DevlinkPortFnSet sets one or more port function attributes specified by the attribute mask. +// It returns 0 on success or error code. +func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { + return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs) +} + +// devlinkInfoGetter is function that is responsible for getting devlink info message +// this is introduced for test purpose +type devlinkInfoGetter func(bus, device string) ([]byte, error) + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) { + info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg) + if err != nil { + return nil, err + } + + return parseInfoData(info), nil +} + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) { + response, err := getInfoMsg(Bus, Device) + if err != nil { + return nil, err + } + + info, err := parseInfoMsg(response) + if err != nil { + return nil, err + } + + return info, nil +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfo returns devlink info for target device, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfoAsMap returns devlink info for target device as a map, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device) + if err != nil { + return nil, err + } + + response, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + if len(response) < 1 { + return nil, fmt.Errorf("getDevlinkInfoMsg: message too short") + } + + return response[0], nil +} + +func parseInfoMsg(msg []byte) (map[string]string, error) { + if len(msg) < nl.SizeofGenlmsg { + return nil, fmt.Errorf("parseInfoMsg: message too short") + } + + info := make(map[string]string) + err := collectInfoData(msg[nl.SizeofGenlmsg:], info) + + if err != nil { + return nil, err + } + + return info, nil +} + +func collectInfoData(msg []byte, data map[string]string) error { + attrs, err := nl.ParseRouteAttr(msg) + if err != nil { + return err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_DRIVER_NAME: + data["driver"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER: + data["serialNumber"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED, + nl.DEVLINK_ATTR_INFO_VERSION_STORED: + key, value, err := getNestedInfoData(attr.Value) + if err != nil { + return err + } + data[key] = value + } + } + + if len(data) == 0 { + return fmt.Errorf("collectInfoData: could not read attributes") + } + + return nil +} + +func getNestedInfoData(msg []byte) (string, string, error) { + nestedAttrs, err := nl.ParseRouteAttr(msg) + + var key, value string + + if err != nil { + return "", "", err + } + + if len(nestedAttrs) != 2 { + return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure") + } + + for _, nestedAttr := range nestedAttrs { + switch nestedAttr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_VERSION_NAME: + key = parseInfoValue(nestedAttr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_VALUE: + value = parseInfoValue(nestedAttr.Value) + } + } + + if key == "" { + return "", "", fmt.Errorf("getNestedInfoData: key not found") + } + + if value == "" { + return "", "", fmt.Errorf("getNestedInfoData: value not found") + } + + return key, value, nil +} + +func parseInfoData(data map[string]string) *DevlinkDeviceInfo { + info := new(DevlinkDeviceInfo) + for key, value := range data { + switch key { + case "driver": + info.Driver = value + case "serialNumber": + info.SerialNumber = value + case "board.id": + info.BoardID = value + case "fw.app": + info.FwApp = value + case "fw.app.bundle_id": + info.FwAppBoundleID = value + case "fw.app.name": + info.FwAppName = value + case "fw.bundle_id": + info.FwBoundleID = value + case "fw.mgmt": + info.FwMgmt = value + case "fw.mgmt.api": + info.FwMgmtAPI = value + case "fw.mgmt.build": + info.FwMgmtBuild = value + case "fw.netlist": + info.FwNetlist = value + case "fw.netlist.build": + info.FwNetlistBuild = value + case "fw.psid.api": + info.FwPsidAPI = value + case "fw.undi": + info.FwUndi = value + } + } + return info +} + +func parseInfoValue(value []byte) string { + v := strings.ReplaceAll(string(value), "\x00", "") + return strings.TrimSpace(v) +} diff --git a/vendor/github.com/vishvananda/netlink/filter.go b/vendor/github.com/vishvananda/netlink/filter.go index 938b28b0b0..2d798b0fbb 100644 --- a/vendor/github.com/vishvananda/netlink/filter.go +++ b/vendor/github.com/vishvananda/netlink/filter.go @@ -2,8 +2,7 @@ package netlink import ( "fmt" - - "github.com/vishvananda/netlink/nl" + "net" ) type Filter interface { @@ -19,7 +18,7 @@ type FilterAttrs struct { Handle uint32 Parent uint32 Priority uint16 // lower is higher priority - Protocol uint16 // syscall.ETH_P_* + Protocol uint16 // unix.ETH_P_* } func (q FilterAttrs) String() string { @@ -137,6 +136,60 @@ func (action *BpfAction) Attrs() *ActionAttrs { return &action.ActionAttrs } +type ConnmarkAction struct { + ActionAttrs + Zone uint16 +} + +func (action *ConnmarkAction) Type() string { + return "connmark" +} + +func (action *ConnmarkAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewConnmarkAction() *ConnmarkAction { + return &ConnmarkAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type CsumUpdateFlags uint32 + +const ( + TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1 + TCA_CSUM_UPDATE_FLAG_ICMP CsumUpdateFlags = 2 + TCA_CSUM_UPDATE_FLAG_IGMP CsumUpdateFlags = 4 + TCA_CSUM_UPDATE_FLAG_TCP CsumUpdateFlags = 8 + TCA_CSUM_UPDATE_FLAG_UDP CsumUpdateFlags = 16 + TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32 + TCA_CSUM_UPDATE_FLAG_SCTP CsumUpdateFlags = 64 +) + +type CsumAction struct { + ActionAttrs + UpdateFlags CsumUpdateFlags +} + +func (action *CsumAction) Type() string { + return "csum" +} + +func (action *CsumAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewCsumAction() *CsumAction { + return &CsumAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + type MirredAct uint8 func (a MirredAct) String() string { @@ -184,71 +237,125 @@ func NewMirredAction(redirIndex int) *MirredAction { } } -// Constants used in TcU32Sel.Flags. +type TunnelKeyAct int8 + const ( - TC_U32_TERMINAL = nl.TC_U32_TERMINAL - TC_U32_OFFSET = nl.TC_U32_OFFSET - TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET - TC_U32_EAT = nl.TC_U32_EAT + TCA_TUNNEL_KEY_SET TunnelKeyAct = 1 // set tunnel key + TCA_TUNNEL_KEY_UNSET TunnelKeyAct = 2 // unset tunnel key ) -// Sel of the U32 filters that contains multiple TcU32Key. This is the copy -// and the frontend representation of nl.TcU32Sel. It is serialized into canonical -// nl.TcU32Sel with the appropriate endianness. -type TcU32Sel struct { - Flags uint8 - Offshift uint8 - Nkeys uint8 - Pad uint8 - Offmask uint16 - Off uint16 - Offoff int16 - Hoff int16 - Hmask uint32 - Keys []TcU32Key -} - -// TcU32Key contained of Sel in the U32 filters. This is the copy and the frontend -// representation of nl.TcU32Key. It is serialized into chanonical nl.TcU32Sel -// with the appropriate endianness. -type TcU32Key struct { - Mask uint32 - Val uint32 - Off int32 - OffMask int32 +type TunnelKeyAction struct { + ActionAttrs + Action TunnelKeyAct + SrcAddr net.IP + DstAddr net.IP + KeyID uint32 + DestPort uint16 } -// U32 filters on many packet related properties -type U32 struct { +func (action *TunnelKeyAction) Type() string { + return "tunnel_key" +} + +func (action *TunnelKeyAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewTunnelKeyAction() *TunnelKeyAction { + return &TunnelKeyAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type SkbEditAction struct { + ActionAttrs + QueueMapping *uint16 + PType *uint16 + Priority *uint32 + Mark *uint32 +} + +func (action *SkbEditAction) Type() string { + return "skbedit" +} + +func (action *SkbEditAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewSkbEditAction() *SkbEditAction { + return &SkbEditAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + +type PoliceAction struct { + ActionAttrs + Rate uint32 // in byte per second + Burst uint32 // in byte + RCellLog int + Mtu uint32 + Mpu uint16 // in byte + PeakRate uint32 // in byte per second + PCellLog int + AvRate uint32 // in byte per second + Overhead uint16 + LinkLayer int + ExceedAction TcPolAct + NotExceedAction TcPolAct +} + +func (action *PoliceAction) Type() string { + return "police" +} + +func (action *PoliceAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewPoliceAction() *PoliceAction { + return &PoliceAction{ + RCellLog: -1, + PCellLog: -1, + LinkLayer: 1, // ETHERNET + ExceedAction: TC_POLICE_RECLASSIFY, + NotExceedAction: TC_POLICE_OK, + } +} + +// MatchAll filters match all packets +type MatchAll struct { FilterAttrs - ClassId uint32 - RedirIndex int - Sel *TcU32Sel - Actions []Action + ClassId uint32 + Actions []Action } -func (filter *U32) Attrs() *FilterAttrs { +func (filter *MatchAll) Attrs() *FilterAttrs { return &filter.FilterAttrs } -func (filter *U32) Type() string { - return "u32" +func (filter *MatchAll) Type() string { + return "matchall" +} + +type FwFilter struct { + FilterAttrs + ClassId uint32 + InDev string + Mask uint32 + Police *PoliceAction +} + +func (filter *FwFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs } -type FilterFwAttrs struct { - ClassId uint32 - InDev string - Mask uint32 - Index uint32 - Buffer uint32 - Mtu uint32 - Mpu uint16 - Rate uint32 - AvRate uint32 - PeakRate uint32 - Action TcPolAct - Overhead uint16 - LinkLayer int +func (filter *FwFilter) Type() string { + return "fw" } type BpfFilter struct { @@ -257,6 +364,8 @@ type BpfFilter struct { Fd int Name string DirectAction bool + Id int + Tag string } func (filter *BpfFilter) Type() string { diff --git a/vendor/github.com/vishvananda/netlink/filter_linux.go b/vendor/github.com/vishvananda/netlink/filter_linux.go index dc0f90af88..4c6d1cf7d7 100644 --- a/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -3,82 +3,177 @@ package netlink import ( "bytes" "encoding/binary" + "encoding/hex" "errors" "fmt" + "net" "syscall" - "unsafe" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) -// Fw filter filters on firewall marks -// NOTE: this is in filter_linux because it refers to nl.TcPolice which -// is defined in nl/tc_linux.go -type Fw struct { +// Constants used in TcU32Sel.Flags. +const ( + TC_U32_TERMINAL = nl.TC_U32_TERMINAL + TC_U32_OFFSET = nl.TC_U32_OFFSET + TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET + TC_U32_EAT = nl.TC_U32_EAT +) + +// Sel of the U32 filters that contains multiple TcU32Key. This is the type +// alias and the frontend representation of nl.TcU32Sel. It is serialized into +// canonical nl.TcU32Sel with the appropriate endianness. +type TcU32Sel = nl.TcU32Sel + +// TcU32Key contained of Sel in the U32 filters. This is the type alias and the +// frontend representation of nl.TcU32Key. It is serialized into chanonical +// nl.TcU32Sel with the appropriate endianness. +type TcU32Key = nl.TcU32Key + +// U32 filters on many packet related properties +type U32 struct { FilterAttrs - ClassId uint32 - // TODO remove nl type from interface - Police nl.TcPolice - InDev string - // TODO Action - Mask uint32 - AvRate uint32 - Rtab [256]uint32 - Ptab [256]uint32 -} - -func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) { - var rtab [256]uint32 - var ptab [256]uint32 - rcellLog := -1 - pcellLog := -1 - avrate := fattrs.AvRate / 8 - police := nl.TcPolice{} - police.Rate.Rate = fattrs.Rate / 8 - police.PeakRate.Rate = fattrs.PeakRate / 8 - buffer := fattrs.Buffer - linklayer := nl.LINKLAYER_ETHERNET + ClassId uint32 + Divisor uint32 // Divisor MUST be power of 2. + Hash uint32 + Link uint32 + RedirIndex int + Sel *TcU32Sel + Actions []Action +} - if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC { - linklayer = fattrs.LinkLayer - } +func (filter *U32) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} - police.Action = int32(fattrs.Action) - if police.Rate.Rate != 0 { - police.Rate.Mpu = fattrs.Mpu - police.Rate.Overhead = fattrs.Overhead - if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("TBF: failed to calculate rate table") - } - police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer))) +func (filter *U32) Type() string { + return "u32" +} + +type Flower struct { + FilterAttrs + DestIP net.IP + DestIPMask net.IPMask + SrcIP net.IP + SrcIPMask net.IPMask + EthType uint16 + EncDestIP net.IP + EncDestIPMask net.IPMask + EncSrcIP net.IP + EncSrcIPMask net.IPMask + EncDestPort uint16 + EncKeyId uint32 + + Actions []Action +} + +func (filter *Flower) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Flower) Type() string { + return "flower" +} + +func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) { + ipType := v4Type + maskType := v4MaskType + + encodeMask := mask + if mask == nil { + encodeMask = net.CIDRMask(32, 32) } - police.Mtu = fattrs.Mtu - if police.PeakRate.Rate != 0 { - police.PeakRate.Mpu = fattrs.Mpu - police.PeakRate.Overhead = fattrs.Overhead - if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("POLICE: failed to calculate peak rate table") + v4IP := ip.To4() + if v4IP == nil { + ipType = v6Type + maskType = v6MaskType + if mask == nil { + encodeMask = net.CIDRMask(128, 128) } + } else { + ip = v4IP } - return &Fw{ - FilterAttrs: attrs, - ClassId: fattrs.ClassId, - InDev: fattrs.InDev, - Mask: fattrs.Mask, - Police: police, - AvRate: avrate, - Rtab: rtab, - Ptab: ptab, - }, nil + parent.AddRtAttr(ipType, ip) + parent.AddRtAttr(maskType, encodeMask) } -func (filter *Fw) Attrs() *FilterAttrs { - return &filter.FilterAttrs +func (filter *Flower) encode(parent *nl.RtAttr) error { + if filter.EthType != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType)) + } + if filter.SrcIP != nil { + filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask, + nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC, + nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK) + } + if filter.DestIP != nil { + filter.encodeIP(parent, filter.DestIP, filter.DestIPMask, + nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST, + nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK) + } + if filter.EncSrcIP != nil { + filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK) + } + if filter.EncDestIP != nil { + filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK) + } + if filter.EncDestPort != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort)) + } + if filter.EncKeyId != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) + } + + actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + return nil } -func (filter *Fw) Type() string { - return "fw" +func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FLOWER_KEY_ETH_TYPE: + filter.EthType = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC: + filter.SrcIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK: + filter.SrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST: + filter.DestIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK: + filter.DestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC: + filter.EncSrcIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK: + filter.EncSrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST: + filter.EncDestIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK: + filter.EncDestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT: + filter.EncDestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_ENC_KEY_ID: + filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return err + } + filter.Actions, err = parseActions(tables) + if err != nil { + return err + } + } + } + return nil } // FilterDel will delete a filter from the system. @@ -90,7 +185,7 @@ func FilterDel(filter Filter) error { // FilterDel will delete a filter from the system. // Equivalent to: `tc filter del $filter` func (h *Handle) FilterDel(filter Filter) error { - req := h.newNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -101,7 +196,7 @@ func (h *Handle) FilterDel(filter Filter) error { } req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -114,8 +209,23 @@ func FilterAdd(filter Filter) error { // FilterAdd will add a filter to the system. // Equivalent to: `tc filter add $filter` func (h *Handle) FilterAdd(filter Filter) error { - native = nl.NativeEndian() - req := h.newNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) +} + +// FilterReplace will replace a filter. +// Equivalent to: `tc filter replace $filter` +func FilterReplace(filter Filter) error { + return pkgHandle.FilterReplace(filter) +} + +// FilterReplace will replace a filter. +// Equivalent to: `tc filter replace $filter` +func (h *Handle) FilterReplace(filter Filter) error { + return h.filterModify(filter, unix.NLM_F_CREATE) +} + +func (h *Handle) filterModify(filter Filter, flags int) error { + req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -128,9 +238,10 @@ func (h *Handle) FilterAdd(filter Filter) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if u32, ok := filter.(*U32); ok { - // Convert TcU32Sel into nl.TcU32Sel as it is without copy. - sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel)) + + switch filter := filter.(type) { + case *U32: + sel := filter.Sel if sel == nil { // match all sel = &nl.TcU32Sel{ @@ -157,64 +268,81 @@ func (h *Handle) FilterAdd(filter Filter) error { } } sel.Nkeys = uint8(len(sel.Keys)) - nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize()) - if u32.ClassId != 0 { - nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId)) + options.AddRtAttr(nl.TCA_U32_SEL, sel.Serialize()) + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId)) } - actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil) + if filter.Divisor != 0 { + if (filter.Divisor-1)&filter.Divisor != 0 { + return fmt.Errorf("illegal divisor %d. Must be a power of 2", filter.Divisor) + } + options.AddRtAttr(nl.TCA_U32_DIVISOR, nl.Uint32Attr(filter.Divisor)) + } + if filter.Hash != 0 { + options.AddRtAttr(nl.TCA_U32_HASH, nl.Uint32Attr(filter.Hash)) + } + if filter.Link != 0 { + options.AddRtAttr(nl.TCA_U32_LINK, nl.Uint32Attr(filter.Link)) + } + actionsAttr := options.AddRtAttr(nl.TCA_U32_ACT, nil) // backwards compatibility - if u32.RedirIndex != 0 { - u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...) + if filter.RedirIndex != 0 { + filter.Actions = append([]Action{NewMirredAction(filter.RedirIndex)}, filter.Actions...) } - if err := EncodeActions(actionsAttr, u32.Actions); err != nil { + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { return err } - } else if fw, ok := filter.(*Fw); ok { - if fw.Mask != 0 { + case *FwFilter: + if filter.Mask != 0 { b := make([]byte, 4) - native.PutUint32(b, fw.Mask) - nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b) + native.PutUint32(b, filter.Mask) + options.AddRtAttr(nl.TCA_FW_MASK, b) } - if fw.InDev != "" { - nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev)) + if filter.InDev != "" { + options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) } - if (fw.Police != nl.TcPolice{}) { - - police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil) - nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize()) - if (fw.Police.Rate != nl.TcRateSpec{}) { - payload := SerializeRtab(fw.Rtab) - nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload) - } - if (fw.Police.PeakRate != nl.TcRateSpec{}) { - payload := SerializeRtab(fw.Ptab) - nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload) + if filter.Police != nil { + police := options.AddRtAttr(nl.TCA_FW_POLICE, nil) + if err := encodePolice(police, filter.Police); err != nil { + return err } } - if fw.ClassId != 0 { + if filter.ClassId != 0 { b := make([]byte, 4) - native.PutUint32(b, fw.ClassId) - nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b) + native.PutUint32(b, filter.ClassId) + options.AddRtAttr(nl.TCA_FW_CLASSID, b) } - } else if bpf, ok := filter.(*BpfFilter); ok { + case *BpfFilter: var bpfFlags uint32 - if bpf.ClassId != 0 { - nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(bpf.ClassId)) + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId)) } - if bpf.Fd >= 0 { - nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(bpf.Fd)))) + if filter.Fd >= 0 { + options.AddRtAttr(nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd)))) } - if bpf.Name != "" { - nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(bpf.Name)) + if filter.Name != "" { + options.AddRtAttr(nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name)) } - if bpf.DirectAction { + if filter.DirectAction { bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT } - nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags)) + options.AddRtAttr(nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags)) + case *MatchAll: + actionsAttr := options.AddRtAttr(nl.TCA_MATCHALL_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + if filter.ClassId != 0 { + options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId)) + } + case *Flower: + if err := filter.encode(options); err != nil { + return err + } } req.AddData(options) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -229,7 +357,7 @@ func FilterList(link Link, parent uint32) ([]Filter, error) { // Equivalent to: `tc filter show`. // Generally returns nothing if link and parent are not specified. func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { - req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(unix.RTM_GETTFILTER, unix.NLM_F_DUMP) msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, Parent: parent, @@ -241,7 +369,7 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { } req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTFILTER) if err != nil { return nil, err } @@ -274,9 +402,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { case "u32": filter = &U32{} case "fw": - filter = &Fw{} + filter = &FwFilter{} case "bpf": filter = &BpfFilter{} + case "matchall": + filter = &MatchAll{} + case "flower": + filter = &Flower{} default: filter = &GenericFilter{FilterType: filterType} } @@ -301,6 +433,16 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { if err != nil { return nil, err } + case "matchall": + detailed, err = parseMatchAllData(filter, data) + if err != nil { + return nil, err + } + case "flower": + detailed, err = parseFlowerData(filter, data) + if err != nil { + return nil, err + } default: detailed = true } @@ -332,6 +474,53 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { attrs.Bindcnt = int(tcgen.Bindcnt) } +func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { + var rtab [256]uint32 + var ptab [256]uint32 + police := nl.TcPolice{} + police.Index = uint32(action.Attrs().Index) + police.Bindcnt = int32(action.Attrs().Bindcnt) + police.Capab = uint32(action.Attrs().Capab) + police.Refcnt = int32(action.Attrs().Refcnt) + police.Rate.Rate = action.Rate + police.PeakRate.Rate = action.PeakRate + police.Action = int32(action.ExceedAction) + + if police.Rate.Rate != 0 { + police.Rate.Mpu = action.Mpu + police.Rate.Overhead = action.Overhead + if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("TBF: failed to calculate rate table") + } + police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst) + } + + police.Mtu = action.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = action.Mpu + police.PeakRate.Overhead = action.Overhead + if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("POLICE: failed to calculate peak rate table") + } + } + + attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize()) + if police.Rate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab)) + } + if police.PeakRate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab)) + } + if action.AvRate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate)) + } + if action.NotExceedAction != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction))) + } + + return nil +} + func EncodeActions(attr *nl.RtAttr, actions []Action) error { tabIndex := int(nl.TCA_ACT_TAB) @@ -339,40 +528,141 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { switch action := action.(type) { default: return fmt.Errorf("unknown action type %s", action.Type()) + case *PoliceAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + if err := encodePolice(aopts, action); err != nil { + return err + } case *MirredAction: - table := nl.NewRtAttrChild(attr, tabIndex, nil) + table := attr.AddRtAttr(tabIndex, nil) tabIndex++ - nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred")) - aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil) + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("mirred")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) mirred := nl.TcMirred{ Eaction: int32(action.MirredAction), Ifindex: uint32(action.Ifindex), } toTcGen(action.Attrs(), &mirred.TcGen) - nl.NewRtAttrChild(aopts, nl.TCA_MIRRED_PARMS, mirred.Serialize()) + aopts.AddRtAttr(nl.TCA_MIRRED_PARMS, mirred.Serialize()) + case *TunnelKeyAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("tunnel_key")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + tun := nl.TcTunnelKey{ + Action: int32(action.Action), + } + toTcGen(action.Attrs(), &tun.TcGen) + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_PARMS, tun.Serialize()) + if action.Action == TCA_TUNNEL_KEY_SET { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_KEY_ID, htonl(action.KeyID)) + if v4 := action.SrcAddr.To4(); v4 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC, v4[:]) + } else if v6 := action.SrcAddr.To16(); v6 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, v6[:]) + } else { + return fmt.Errorf("invalid src addr %s for tunnel_key action", action.SrcAddr) + } + if v4 := action.DstAddr.To4(); v4 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV4_DST, v4[:]) + } else if v6 := action.DstAddr.To16(); v6 != nil { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, v6[:]) + } else { + return fmt.Errorf("invalid dst addr %s for tunnel_key action", action.DstAddr) + } + if action.DestPort != 0 { + aopts.AddRtAttr(nl.TCA_TUNNEL_KEY_ENC_DST_PORT, htons(action.DestPort)) + } + } + case *SkbEditAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("skbedit")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + skbedit := nl.TcSkbEdit{} + toTcGen(action.Attrs(), &skbedit.TcGen) + aopts.AddRtAttr(nl.TCA_SKBEDIT_PARMS, skbedit.Serialize()) + if action.QueueMapping != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_QUEUE_MAPPING, nl.Uint16Attr(*action.QueueMapping)) + } + if action.Priority != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_PRIORITY, nl.Uint32Attr(*action.Priority)) + } + if action.PType != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_PTYPE, nl.Uint16Attr(*action.PType)) + } + if action.Mark != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) + } + case *ConnmarkAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("connmark")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + connmark := nl.TcConnmark{ + Zone: action.Zone, + } + toTcGen(action.Attrs(), &connmark.TcGen) + aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize()) + case *CsumAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + csum := nl.TcCsum{ + UpdateFlags: uint32(action.UpdateFlags), + } + toTcGen(action.Attrs(), &csum.TcGen) + aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize()) case *BpfAction: - table := nl.NewRtAttrChild(attr, tabIndex, nil) + table := attr.AddRtAttr(tabIndex, nil) tabIndex++ - nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf")) - aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil) + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("bpf")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) gen := nl.TcGen{} toTcGen(action.Attrs(), &gen) - nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_PARMS, gen.Serialize()) - nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd))) - nl.NewRtAttrChild(aopts, nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name)) + aopts.AddRtAttr(nl.TCA_ACT_BPF_PARMS, gen.Serialize()) + aopts.AddRtAttr(nl.TCA_ACT_BPF_FD, nl.Uint32Attr(uint32(action.Fd))) + aopts.AddRtAttr(nl.TCA_ACT_BPF_NAME, nl.ZeroTerminated(action.Name)) case *GenericAction: - table := nl.NewRtAttrChild(attr, tabIndex, nil) + table := attr.AddRtAttr(tabIndex, nil) tabIndex++ - nl.NewRtAttrChild(table, nl.TCA_ACT_KIND, nl.ZeroTerminated("gact")) - aopts := nl.NewRtAttrChild(table, nl.TCA_ACT_OPTIONS, nil) + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("gact")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) gen := nl.TcGen{} toTcGen(action.Attrs(), &gen) - nl.NewRtAttrChild(aopts, nl.TCA_GACT_PARMS, gen.Serialize()) + aopts.AddRtAttr(nl.TCA_GACT_PARMS, gen.Serialize()) } } return nil } +func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) { + switch data.Attr.Type { + case nl.TCA_POLICE_RESULT: + police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4])) + case nl.TCA_POLICE_AVRATE: + police.AvRate = native.Uint32(data.Value[0:4]) + case nl.TCA_POLICE_TBF: + p := *nl.DeserializeTcPolice(data.Value) + police.ActionAttrs = ActionAttrs{} + police.Attrs().Index = int(p.Index) + police.Attrs().Bindcnt = int(p.Bindcnt) + police.Attrs().Capab = int(p.Capab) + police.Attrs().Refcnt = int(p.Refcnt) + police.ExceedAction = TcPolAct(p.Action) + police.Rate = p.Rate.Rate + police.PeakRate = p.PeakRate.Rate + police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst) + police.Mtu = p.Mtu + police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK + police.Overhead = p.Rate.Overhead + } +} + func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { var actions []Action for _, table := range tables { @@ -393,8 +683,18 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action = &MirredAction{} case "bpf": action = &BpfAction{} + case "connmark": + action = &ConnmarkAction{} + case "csum": + action = &CsumAction{} case "gact": action = &GenericAction{} + case "tunnel_key": + action = &TunnelKeyAction{} + case "skbedit": + action = &SkbEditAction{} + case "police": + action = &PoliceAction{} default: break nextattr } @@ -409,11 +709,46 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { switch adatum.Attr.Type { case nl.TCA_MIRRED_PARMS: mirred := *nl.DeserializeTcMirred(adatum.Value) - toAttrs(&mirred.TcGen, action.Attrs()) action.(*MirredAction).ActionAttrs = ActionAttrs{} + toAttrs(&mirred.TcGen, action.Attrs()) action.(*MirredAction).Ifindex = int(mirred.Ifindex) action.(*MirredAction).MirredAction = MirredAct(mirred.Eaction) } + case "tunnel_key": + switch adatum.Attr.Type { + case nl.TCA_TUNNEL_KEY_PARMS: + tun := *nl.DeserializeTunnelKey(adatum.Value) + action.(*TunnelKeyAction).ActionAttrs = ActionAttrs{} + toAttrs(&tun.TcGen, action.Attrs()) + action.(*TunnelKeyAction).Action = TunnelKeyAct(tun.Action) + case nl.TCA_TUNNEL_KEY_ENC_KEY_ID: + action.(*TunnelKeyAction).KeyID = networkOrder.Uint32(adatum.Value[0:4]) + case nl.TCA_TUNNEL_KEY_ENC_IPV6_SRC, nl.TCA_TUNNEL_KEY_ENC_IPV4_SRC: + action.(*TunnelKeyAction).SrcAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_IPV6_DST, nl.TCA_TUNNEL_KEY_ENC_IPV4_DST: + action.(*TunnelKeyAction).DstAddr = adatum.Value[:] + case nl.TCA_TUNNEL_KEY_ENC_DST_PORT: + action.(*TunnelKeyAction).DestPort = ntohs(adatum.Value) + } + case "skbedit": + switch adatum.Attr.Type { + case nl.TCA_SKBEDIT_PARMS: + skbedit := *nl.DeserializeSkbEdit(adatum.Value) + action.(*SkbEditAction).ActionAttrs = ActionAttrs{} + toAttrs(&skbedit.TcGen, action.Attrs()) + case nl.TCA_SKBEDIT_MARK: + mark := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_PRIORITY: + priority := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Priority = &priority + case nl.TCA_SKBEDIT_PTYPE: + ptype := native.Uint16(adatum.Value[0:2]) + action.(*SkbEditAction).PType = &ptype + case nl.TCA_SKBEDIT_QUEUE_MAPPING: + mapping := native.Uint16(adatum.Value[0:2]) + action.(*SkbEditAction).QueueMapping = &mapping + } case "bpf": switch adatum.Attr.Type { case nl.TCA_ACT_BPF_PARMS: @@ -424,12 +759,30 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_ACT_BPF_NAME: action.(*BpfAction).Name = string(adatum.Value[:len(adatum.Value)-1]) } + case "connmark": + switch adatum.Attr.Type { + case nl.TCA_CONNMARK_PARMS: + connmark := *nl.DeserializeTcConnmark(adatum.Value) + action.(*ConnmarkAction).ActionAttrs = ActionAttrs{} + toAttrs(&connmark.TcGen, action.Attrs()) + action.(*ConnmarkAction).Zone = connmark.Zone + } + case "csum": + switch adatum.Attr.Type { + case nl.TCA_CSUM_PARMS: + csum := *nl.DeserializeTcCsum(adatum.Value) + action.(*CsumAction).ActionAttrs = ActionAttrs{} + toAttrs(&csum.TcGen, action.Attrs()) + action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + } case "gact": switch adatum.Attr.Type { case nl.TCA_GACT_PARMS: gen := *nl.DeserializeTcGen(adatum.Value) toAttrs(&gen, action.Attrs()) } + case "police": + parsePolice(adatum, action.(*PoliceAction)) } } } @@ -440,7 +793,6 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { } func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() u32 := filter.(*U32) detailed := false for _, datum := range data { @@ -448,7 +800,7 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) case nl.TCA_U32_SEL: detailed = true sel := nl.DeserializeTcU32Sel(datum.Value) - u32.Sel = (*TcU32Sel)(unsafe.Pointer(sel)) + u32.Sel = sel if native != networkOrder { // Handle the endianness of attributes u32.Sel.Offmask = native.Uint16(htons(sel.Offmask)) @@ -474,14 +826,19 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) } case nl.TCA_U32_CLASSID: u32.ClassId = native.Uint32(datum.Value) + case nl.TCA_U32_DIVISOR: + u32.Divisor = native.Uint32(datum.Value) + case nl.TCA_U32_HASH: + u32.Hash = native.Uint32(datum.Value) + case nl.TCA_U32_LINK: + u32.Link = native.Uint32(datum.Value) } } return detailed, nil } func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() - fw := filter.(*Fw) + fw := filter.(*FwFilter) detailed := true for _, datum := range data { switch datum.Attr.Type { @@ -492,24 +849,18 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { case nl.TCA_FW_INDEV: fw.InDev = string(datum.Value[:len(datum.Value)-1]) case nl.TCA_FW_POLICE: + var police PoliceAction adata, _ := nl.ParseRouteAttr(datum.Value) for _, aattr := range adata { - switch aattr.Attr.Type { - case nl.TCA_POLICE_TBF: - fw.Police = *nl.DeserializeTcPolice(aattr.Value) - case nl.TCA_POLICE_RATE: - fw.Rtab = DeserializeRtab(aattr.Value) - case nl.TCA_POLICE_PEAKRATE: - fw.Ptab = DeserializeRtab(aattr.Value) - } + parsePolice(aattr, &police) } + fw.Police = &police } } return detailed, nil } func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - native = nl.NativeEndian() bpf := filter.(*BpfFilter) detailed := true for _, datum := range data { @@ -525,11 +876,40 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) if (flags & nl.TCA_BPF_FLAG_ACT_DIRECT) != 0 { bpf.DirectAction = true } + case nl.TCA_BPF_ID: + bpf.Id = int(native.Uint32(datum.Value[0:4])) + case nl.TCA_BPF_TAG: + bpf.Tag = hex.EncodeToString(datum.Value) } } return detailed, nil } +func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + matchall := filter.(*MatchAll) + detailed := true + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_MATCHALL_CLASSID: + matchall.ClassId = native.Uint32(datum.Value[0:4]) + case nl.TCA_MATCHALL_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + matchall.Actions, err = parseActions(tables) + if err != nil { + return detailed, err + } + } + } + return detailed, nil +} + +func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + return true, filter.(*Flower).decode(data) +} + func AlignToAtm(size uint) uint { var linksize, cells int cells = int(size / nl.ATM_CELL_PAYLOAD) @@ -552,7 +932,7 @@ func AdjustSize(sz uint, mpu uint, linklayer int) uint { } } -func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cellLog int, mtu uint32, linklayer int) int { +func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, linklayer int) int { bps := rate.Rate mpu := rate.Mpu var sz uint @@ -567,7 +947,7 @@ func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cellLog int, mtu uint32, } for i := 0; i < 256; i++ { sz = AdjustSize(uint((i+1)<= nl.IPSET_ERR_PRIVATE { + err = nl.IPSetError(uintptr(errno)) + } + } + return +} + +func ipsetUnserialize(msgs [][]byte) (result IPSetResult) { + for _, msg := range msgs { + result.unserialize(msg) + } + return result +} + +func (result *IPSetResult) unserialize(msg []byte) { + result.Nfgenmsg = nl.DeserializeNfgenmsg(msg) + + for attr := range nl.ParseAttributes(msg[4:]) { + switch attr.Type { + case nl.IPSET_ATTR_PROTOCOL: + result.Protocol = attr.Value[0] + case nl.IPSET_ATTR_SETNAME: + result.SetName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_TYPENAME: + result.TypeName = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_REVISION: + result.Revision = attr.Value[0] + case nl.IPSET_ATTR_FAMILY: + result.Family = attr.Value[0] + case nl.IPSET_ATTR_FLAGS: + result.Flags = attr.Value[0] + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.parseAttrData(attr.Value) + case nl.IPSET_ATTR_ADT | nl.NLA_F_NESTED: + result.parseAttrADT(attr.Value) + case nl.IPSET_ATTR_PROTOCOL_MIN: + result.ProtocolMinVersion = attr.Value[0] + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrData(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_HASHSIZE | nl.NLA_F_NET_BYTEORDER: + result.HashSize = attr.Uint32() + case nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER: + result.MaxElements = attr.Uint32() + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + result.Timeout = &val + case nl.IPSET_ATTR_ELEMENTS | nl.NLA_F_NET_BYTEORDER: + result.NumEntries = attr.Uint32() + case nl.IPSET_ATTR_REFERENCES | nl.NLA_F_NET_BYTEORDER: + result.References = attr.Uint32() + case nl.IPSET_ATTR_MEMSIZE | nl.NLA_F_NET_BYTEORDER: + result.SizeInMemory = attr.Uint32() + case nl.IPSET_ATTR_CADT_FLAGS | nl.NLA_F_NET_BYTEORDER: + result.CadtFlags = attr.Uint32() + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER: + result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value}) + case nl.IPSET_ATTR_IP: + result.IPFrom = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP: + result.IPTo = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER: + result.PortFrom = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER: + result.PortTo = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER: + result.LineNo = attr.Uint32() + case nl.IPSET_ATTR_COMMENT: + result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() + default: + log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func (result *IPSetResult) parseAttrADT(data []byte) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_DATA | nl.NLA_F_NESTED: + result.Entries = append(result.Entries, parseIPSetEntry(attr.Value)) + default: + log.Printf("unknown ADT attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) + } + } +} + +func parseIPSetEntry(data []byte) (entry IPSetEntry) { + for attr := range nl.ParseAttributes(data) { + switch attr.Type { + case nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Timeout = &val + case nl.IPSET_ATTR_BYTES | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Bytes = &val + case nl.IPSET_ATTR_PACKETS | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint64() + entry.Packets = &val + case nl.IPSET_ATTR_ETHER: + entry.MAC = net.HardwareAddr(attr.Value) + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + case nl.IPSET_ATTR_COMMENT: + entry.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IP: + entry.IP = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IP: + entry.IP2 = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_CIDR: + entry.CIDR = attr.Value[0] + case nl.IPSET_ATTR_CIDR2: + entry.CIDR2 = attr.Value[0] + case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER: + val := networkOrder.Uint16(attr.Value) + entry.Port = &val + case nl.IPSET_ATTR_PROTO: + val := attr.Value[0] + entry.Protocol = &val + case nl.IPSET_ATTR_IFACE: + entry.IFace = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Mark = &val + default: + log.Printf("unknown ADT attribute from kernel: %+v", attr) + } + } + return +} diff --git a/vendor/github.com/vishvananda/netlink/link.go b/vendor/github.com/vishvananda/netlink/link.go index 4e77037b5c..33c872336d 100644 --- a/vendor/github.com/vishvananda/netlink/link.go +++ b/vendor/github.com/vishvananda/netlink/link.go @@ -3,6 +3,8 @@ package netlink import ( "fmt" "net" + "os" + "strconv" ) // Link represents a link device from netlink. Shared link attributes @@ -33,10 +35,50 @@ type LinkAttrs struct { Alias string Statistics *LinkStatistics Promisc int + Allmulti int + Multi int Xdp *LinkXdp EncapType string Protinfo *Protinfo OperState LinkOperState + PhysSwitchID int + NetNsID int + NumTxQueues int + NumRxQueues int + GSOMaxSize uint32 + GSOMaxSegs uint32 + Vfs []VfInfo // virtual functions available on link + Group uint32 + Slave LinkSlave +} + +// LinkSlave represents a slave device. +type LinkSlave interface { + SlaveType() string +} + +// VfInfo represents configuration of virtual function +type VfInfo struct { + ID int + Mac net.HardwareAddr + Vlan int + Qos int + TxRate int // IFLA_VF_TX_RATE Max TxRate + Spoofchk bool + LinkState uint32 + MaxTxRate uint32 // IFLA_VF_RATE Max TxRate + MinTxRate uint32 // IFLA_VF_RATE Min TxRate + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 + + RssQuery uint32 + Trust uint32 } // LinkOperState represents the values of the IFLA_OPERSTATE link @@ -75,7 +117,8 @@ func (s LinkOperState) String() string { // NewLinkAttrs returns LinkAttrs structure filled with default values func NewLinkAttrs() LinkAttrs { return LinkAttrs{ - TxQLen: -1, + NetNsID: -1, + TxQLen: -1, } } @@ -168,10 +211,11 @@ type LinkStatistics64 struct { } type LinkXdp struct { - Fd int - Attached bool - Flags uint32 - ProgId uint32 + Fd int + Attached bool + AttachMode uint32 + Flags uint32 + ProgId uint32 } // Device links cannot be created via netlink. These links @@ -218,7 +262,9 @@ func (ifb *Ifb) Type() string { type Bridge struct { LinkAttrs MulticastSnooping *bool + AgeingTime *uint32 HelloTime *uint32 + VlanFiltering *bool } func (bridge *Bridge) Attrs() *LinkAttrs { @@ -232,7 +278,8 @@ func (bridge *Bridge) Type() string { // Vlan links have ParentIndex set in their Attrs() type Vlan struct { LinkAttrs - VlanId int + VlanId int + VlanProtocol VlanProtocol } func (vlan *Vlan) Attrs() *LinkAttrs { @@ -258,6 +305,9 @@ const ( type Macvlan struct { LinkAttrs Mode MacvlanMode + + // MACAddrs is only populated for Macvlan SOURCE links + MACAddrs []net.HardwareAddr } func (macvlan *Macvlan) Attrs() *LinkAttrs { @@ -283,8 +333,13 @@ type TuntapFlag uint16 // Tuntap links created via /dev/tun/tap, but can be destroyed via netlink type Tuntap struct { LinkAttrs - Mode TuntapMode - Flags TuntapFlag + Mode TuntapMode + Flags TuntapFlag + NonPersist bool + Queues int + Fds []*os.File + Owner uint32 + Group uint32 } func (tuntap *Tuntap) Attrs() *LinkAttrs { @@ -298,7 +353,9 @@ func (tuntap *Tuntap) Type() string { // Veth devices must specify PeerName on create type Veth struct { LinkAttrs - PeerName string // veth on create only + PeerName string // veth on create only + PeerHardwareAddr net.HardwareAddr + PeerNamespace interface{} } func (veth *Veth) Attrs() *LinkAttrs { @@ -309,6 +366,19 @@ func (veth *Veth) Type() string { return "veth" } +// Wireguard represent links of type "wireguard", see https://www.wireguard.com/ +type Wireguard struct { + LinkAttrs +} + +func (wg *Wireguard) Attrs() *LinkAttrs { + return &wg.LinkAttrs +} + +func (wg *Wireguard) Type() string { + return "wireguard" +} + // GenericLink links represent types that are not currently understood // by this netlink library. type GenericLink struct { @@ -326,26 +396,28 @@ func (generic *GenericLink) Type() string { type Vxlan struct { LinkAttrs - VxlanId int - VtepDevIndex int - SrcAddr net.IP - Group net.IP - TTL int - TOS int - Learning bool - Proxy bool - RSC bool - L2miss bool - L3miss bool - UDPCSum bool - NoAge bool - GBP bool - FlowBased bool - Age int - Limit int - Port int - PortLow int - PortHigh int + VxlanId int + VtepDevIndex int + SrcAddr net.IP + Group net.IP + TTL int + TOS int + Learning bool + Proxy bool + RSC bool + L2miss bool + L3miss bool + UDPCSum bool + UDP6ZeroCSumTx bool + UDP6ZeroCSumRx bool + NoAge bool + GBP bool + FlowBased bool + Age int + Limit int + Port int + PortLow int + PortHigh int } func (vxlan *Vxlan) Attrs() *LinkAttrs { @@ -365,9 +437,18 @@ const ( IPVLAN_MODE_MAX ) +type IPVlanFlag uint16 + +const ( + IPVLAN_FLAG_BRIDGE IPVlanFlag = iota + IPVLAN_FLAG_PRIVATE + IPVLAN_FLAG_VEPA +) + type IPVlan struct { LinkAttrs Mode IPVlanMode + Flag IPVlanFlag } func (ipvlan *IPVlan) Attrs() *LinkAttrs { @@ -378,6 +459,56 @@ func (ipvlan *IPVlan) Type() string { return "ipvlan" } +// IPVtap - IPVtap is a virtual interfaces based on ipvlan +type IPVtap struct { + IPVlan +} + +func (ipvtap *IPVtap) Attrs() *LinkAttrs { + return &ipvtap.LinkAttrs +} + +func (ipvtap IPVtap) Type() string { + return "ipvtap" +} + +// VlanProtocol type +type VlanProtocol int + +func (p VlanProtocol) String() string { + s, ok := VlanProtocolToString[p] + if !ok { + return fmt.Sprintf("VlanProtocol(%d)", p) + } + return s +} + +// StringToVlanProtocol returns vlan protocol, or unknown is the s is invalid. +func StringToVlanProtocol(s string) VlanProtocol { + mode, ok := StringToVlanProtocolMap[s] + if !ok { + return VLAN_PROTOCOL_UNKNOWN + } + return mode +} + +// VlanProtocol possible values +const ( + VLAN_PROTOCOL_UNKNOWN VlanProtocol = 0 + VLAN_PROTOCOL_8021Q VlanProtocol = 0x8100 + VLAN_PROTOCOL_8021AD VlanProtocol = 0x88A8 +) + +var VlanProtocolToString = map[VlanProtocol]string{ + VLAN_PROTOCOL_8021Q: "802.1q", + VLAN_PROTOCOL_8021AD: "802.1ad", +} + +var StringToVlanProtocolMap = map[string]VlanProtocol{ + "802.1q": VLAN_PROTOCOL_8021Q, + "802.1ad": VLAN_PROTOCOL_8021AD, +} + // BondMode type type BondMode int @@ -389,7 +520,7 @@ func (b BondMode) String() string { return s } -// StringToBondMode returns bond mode, or uknonw is the s is invalid. +// StringToBondMode returns bond mode, or unknown is the s is invalid. func StringToBondMode(s string) BondMode { mode, ok := StringToBondModeMap[s] if !ok { @@ -440,6 +571,27 @@ const ( BOND_ARP_VALIDATE_ALL ) +var bondArpValidateToString = map[BondArpValidate]string{ + BOND_ARP_VALIDATE_NONE: "none", + BOND_ARP_VALIDATE_ACTIVE: "active", + BOND_ARP_VALIDATE_BACKUP: "backup", + BOND_ARP_VALIDATE_ALL: "none", +} +var StringToBondArpValidateMap = map[string]BondArpValidate{ + "none": BOND_ARP_VALIDATE_NONE, + "active": BOND_ARP_VALIDATE_ACTIVE, + "backup": BOND_ARP_VALIDATE_BACKUP, + "all": BOND_ARP_VALIDATE_ALL, +} + +func (b BondArpValidate) String() string { + s, ok := bondArpValidateToString[b] + if !ok { + return fmt.Sprintf("BondArpValidate(%d)", b) + } + return s +} + // BondPrimaryReselect type type BondPrimaryReselect int @@ -450,6 +602,25 @@ const ( BOND_PRIMARY_RESELECT_FAILURE ) +var bondPrimaryReselectToString = map[BondPrimaryReselect]string{ + BOND_PRIMARY_RESELECT_ALWAYS: "always", + BOND_PRIMARY_RESELECT_BETTER: "better", + BOND_PRIMARY_RESELECT_FAILURE: "failure", +} +var StringToBondPrimaryReselectMap = map[string]BondPrimaryReselect{ + "always": BOND_PRIMARY_RESELECT_ALWAYS, + "better": BOND_PRIMARY_RESELECT_BETTER, + "failure": BOND_PRIMARY_RESELECT_FAILURE, +} + +func (b BondPrimaryReselect) String() string { + s, ok := bondPrimaryReselectToString[b] + if !ok { + return fmt.Sprintf("BondPrimaryReselect(%d)", b) + } + return s +} + // BondArpAllTargets type type BondArpAllTargets int @@ -459,6 +630,23 @@ const ( BOND_ARP_ALL_TARGETS_ALL ) +var bondArpAllTargetsToString = map[BondArpAllTargets]string{ + BOND_ARP_ALL_TARGETS_ANY: "any", + BOND_ARP_ALL_TARGETS_ALL: "all", +} +var StringToBondArpAllTargetsMap = map[string]BondArpAllTargets{ + "any": BOND_ARP_ALL_TARGETS_ANY, + "all": BOND_ARP_ALL_TARGETS_ALL, +} + +func (b BondArpAllTargets) String() string { + s, ok := bondArpAllTargetsToString[b] + if !ok { + return fmt.Sprintf("BondArpAllTargets(%d)", b) + } + return s +} + // BondFailOverMac type type BondFailOverMac int @@ -469,6 +657,25 @@ const ( BOND_FAIL_OVER_MAC_FOLLOW ) +var bondFailOverMacToString = map[BondFailOverMac]string{ + BOND_FAIL_OVER_MAC_NONE: "none", + BOND_FAIL_OVER_MAC_ACTIVE: "active", + BOND_FAIL_OVER_MAC_FOLLOW: "follow", +} +var StringToBondFailOverMacMap = map[string]BondFailOverMac{ + "none": BOND_FAIL_OVER_MAC_NONE, + "active": BOND_FAIL_OVER_MAC_ACTIVE, + "follow": BOND_FAIL_OVER_MAC_FOLLOW, +} + +func (b BondFailOverMac) String() string { + s, ok := bondFailOverMacToString[b] + if !ok { + return fmt.Sprintf("BondFailOverMac(%d)", b) + } + return s +} + // BondXmitHashPolicy type type BondXmitHashPolicy int @@ -480,7 +687,7 @@ func (b BondXmitHashPolicy) String() string { return s } -// StringToBondXmitHashPolicy returns bond lacp arte, or uknonw is the s is invalid. +// StringToBondXmitHashPolicy returns bond lacp arte, or unknown is the s is invalid. func StringToBondXmitHashPolicy(s string) BondXmitHashPolicy { lacp, ok := StringToBondXmitHashPolicyMap[s] if !ok { @@ -525,7 +732,7 @@ func (b BondLacpRate) String() string { return s } -// StringToBondLacpRate returns bond lacp arte, or uknonw is the s is invalid. +// StringToBondLacpRate returns bond lacp arte, or unknown is the s is invalid. func StringToBondLacpRate(s string) BondLacpRate { lacp, ok := StringToBondLacpRateMap[s] if !ok { @@ -560,6 +767,25 @@ const ( BOND_AD_SELECT_COUNT ) +var bondAdSelectToString = map[BondAdSelect]string{ + BOND_AD_SELECT_STABLE: "stable", + BOND_AD_SELECT_BANDWIDTH: "bandwidth", + BOND_AD_SELECT_COUNT: "count", +} +var StringToBondAdSelectMap = map[string]BondAdSelect{ + "stable": BOND_AD_SELECT_STABLE, + "bandwidth": BOND_AD_SELECT_BANDWIDTH, + "count": BOND_AD_SELECT_COUNT, +} + +func (b BondAdSelect) String() string { + s, ok := bondAdSelectToString[b] + if !ok { + return fmt.Sprintf("BondAdSelect(%d)", b) + } + return s +} + // BondAdInfo represents ad info for bond type BondAdInfo struct { AggregatorId int @@ -591,7 +817,7 @@ type Bond struct { AllSlavesActive int MinLinks int LpInterval int - PackersPerSlave int + PacketsPerSlave int LacpRate BondLacpRate AdSelect BondAdSelect // looking at iproute tool AdInfo can only be retrived. It can't be set. @@ -624,7 +850,7 @@ func NewLinkBond(atr LinkAttrs) *Bond { AllSlavesActive: -1, MinLinks: -1, LpInterval: -1, - PackersPerSlave: -1, + PacketsPerSlave: -1, LacpRate: -1, AdSelect: -1, AdActorSysPrio: -1, @@ -669,6 +895,105 @@ func (bond *Bond) Type() string { return "bond" } +// BondSlaveState represents the values of the IFLA_BOND_SLAVE_STATE bond slave +// attribute, which contains the state of the bond slave. +type BondSlaveState uint8 + +const ( + //BondStateActive Link is active. + BondStateActive BondSlaveState = iota + //BondStateBackup Link is backup. + BondStateBackup +) + +func (s BondSlaveState) String() string { + switch s { + case BondStateActive: + return "ACTIVE" + case BondStateBackup: + return "BACKUP" + default: + return strconv.Itoa(int(s)) + } +} + +// BondSlaveMiiStatus represents the values of the IFLA_BOND_SLAVE_MII_STATUS bond slave +// attribute, which contains the status of MII link monitoring +type BondSlaveMiiStatus uint8 + +const ( + //BondLinkUp link is up and running. + BondLinkUp BondSlaveMiiStatus = iota + //BondLinkFail link has just gone down. + BondLinkFail + //BondLinkDown link has been down for too long time. + BondLinkDown + //BondLinkBack link is going back. + BondLinkBack +) + +func (s BondSlaveMiiStatus) String() string { + switch s { + case BondLinkUp: + return "UP" + case BondLinkFail: + return "GOING_DOWN" + case BondLinkDown: + return "DOWN" + case BondLinkBack: + return "GOING_BACK" + default: + return strconv.Itoa(int(s)) + } +} + +type BondSlave struct { + State BondSlaveState + MiiStatus BondSlaveMiiStatus + LinkFailureCount uint32 + PermHardwareAddr net.HardwareAddr + QueueId uint16 + AggregatorId uint16 + AdActorOperPortState uint8 + AdPartnerOperPortState uint16 +} + +func (b *BondSlave) SlaveType() string { + return "bond" +} + +type VrfSlave struct { + Table uint32 +} + +func (v *VrfSlave) SlaveType() string { + return "vrf" +} + +// Geneve devices must specify RemoteIP and ID (VNI) on create +// https://github.com/torvalds/linux/blob/47ec5303d73ea344e84f46660fff693c57641386/drivers/net/geneve.c#L1209-L1223 +type Geneve struct { + LinkAttrs + ID uint32 // vni + Remote net.IP + Ttl uint8 + Tos uint8 + Dport uint16 + UdpCsum uint8 + UdpZeroCsum6Tx uint8 + UdpZeroCsum6Rx uint8 + Link uint32 + FlowBased bool +} + +func (geneve *Geneve) Attrs() *LinkAttrs { + return &geneve.LinkAttrs +} + +func (geneve *Geneve) Type() string { + return "geneve" +} + // Gretap devices must specify LocalIP and RemoteIP on create type Gretap struct { LinkAttrs @@ -694,17 +1019,26 @@ func (gretap *Gretap) Attrs() *LinkAttrs { } func (gretap *Gretap) Type() string { + if gretap.Local.To4() == nil { + return "ip6gretap" + } return "gretap" } type Iptun struct { LinkAttrs - Ttl uint8 - Tos uint8 - PMtuDisc uint8 - Link uint32 - Local net.IP - Remote net.IP + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + Link uint32 + Local net.IP + Remote net.IP + EncapSport uint16 + EncapDport uint16 + EncapType uint16 + EncapFlags uint16 + FlowBased bool + Proto uint8 } func (iptun *Iptun) Attrs() *LinkAttrs { @@ -715,6 +1049,86 @@ func (iptun *Iptun) Type() string { return "ipip" } +type Ip6tnl struct { + LinkAttrs + Link uint32 + Local net.IP + Remote net.IP + Ttl uint8 + Tos uint8 + Flags uint32 + Proto uint8 + FlowInfo uint32 + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (ip6tnl *Ip6tnl) Attrs() *LinkAttrs { + return &ip6tnl.LinkAttrs +} + +func (ip6tnl *Ip6tnl) Type() string { + return "ip6tnl" +} + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84 +type TunnelEncapType uint16 + +const ( + None TunnelEncapType = iota + FOU + GUE +) + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91 +type TunnelEncapFlag uint16 + +const ( + CSum TunnelEncapFlag = 1 << 0 + CSum6 = 1 << 1 + RemCSum = 1 << 2 +) + +// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12 +type IP6TunnelFlag uint16 + +const ( + IP6_TNL_F_IGN_ENCAP_LIMIT IP6TunnelFlag = 1 // don't add encapsulation limit if one isn't present in inner packet + IP6_TNL_F_USE_ORIG_TCLASS = 2 // copy the traffic class field from the inner packet + IP6_TNL_F_USE_ORIG_FLOWLABEL = 4 // copy the flowlabel from the inner packet + IP6_TNL_F_MIP6_DEV = 8 // being used for Mobile IPv6 + IP6_TNL_F_RCV_DSCP_COPY = 10 // copy DSCP from the outer packet + IP6_TNL_F_USE_ORIG_FWMARK = 20 // copy fwmark from inner packet + IP6_TNL_F_ALLOW_LOCAL_REMOTE = 40 // allow remote endpoint on the local node +) + +type Sittun struct { + LinkAttrs + Link uint32 + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + Proto uint8 + Local net.IP + Remote net.IP + EncapLimit uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (sittun *Sittun) Attrs() *LinkAttrs { + return &sittun.LinkAttrs +} + +func (sittun *Sittun) Type() string { + return "sit" +} + type Vti struct { LinkAttrs IKey uint32 @@ -728,10 +1142,42 @@ func (vti *Vti) Attrs() *LinkAttrs { return &vti.LinkAttrs } -func (iptun *Vti) Type() string { +func (vti *Vti) Type() string { + if vti.Local.To4() == nil { + return "vti6" + } return "vti" } +type Gretun struct { + LinkAttrs + Link uint32 + IFlags uint16 + OFlags uint16 + IKey uint32 + OKey uint32 + Local net.IP + Remote net.IP + Ttl uint8 + Tos uint8 + PMtuDisc uint8 + EncapType uint16 + EncapFlags uint16 + EncapSport uint16 + EncapDport uint16 +} + +func (gretun *Gretun) Attrs() *LinkAttrs { + return &gretun.LinkAttrs +} + +func (gretun *Gretun) Type() string { + if gretun.Local.To4() == nil { + return "ip6gre" + } + return "gre" +} + type Vrf struct { LinkAttrs Table uint32 @@ -761,11 +1207,136 @@ func (gtp *GTP) Type() string { return "gtp" } +// Virtual XFRM Interfaces +// Named "xfrmi" to prevent confusion with XFRM objects +type Xfrmi struct { + LinkAttrs + Ifid uint32 +} + +func (xfrm *Xfrmi) Attrs() *LinkAttrs { + return &xfrm.LinkAttrs +} + +func (xfrm *Xfrmi) Type() string { + return "xfrm" +} + +// IPoIB interface + +type IPoIBMode uint16 + +func (m *IPoIBMode) String() string { + str, ok := iPoIBModeToString[*m] + if !ok { + return fmt.Sprintf("mode(%d)", *m) + } + return str +} + +const ( + IPOIB_MODE_DATAGRAM = iota + IPOIB_MODE_CONNECTED +) + +var iPoIBModeToString = map[IPoIBMode]string{ + IPOIB_MODE_DATAGRAM: "datagram", + IPOIB_MODE_CONNECTED: "connected", +} + +var StringToIPoIBMode = map[string]IPoIBMode{ + "datagram": IPOIB_MODE_DATAGRAM, + "connected": IPOIB_MODE_CONNECTED, +} + +const ( + CAN_STATE_ERROR_ACTIVE = iota + CAN_STATE_ERROR_WARNING + CAN_STATE_ERROR_PASSIVE + CAN_STATE_BUS_OFF + CAN_STATE_STOPPED + CAN_STATE_SLEEPING +) + +type Can struct { + LinkAttrs + + BitRate uint32 + SamplePoint uint32 + TimeQuanta uint32 + PropagationSegment uint32 + PhaseSegment1 uint32 + PhaseSegment2 uint32 + SyncJumpWidth uint32 + BitRatePreScaler uint32 + + Name string + TimeSegment1Min uint32 + TimeSegment1Max uint32 + TimeSegment2Min uint32 + TimeSegment2Max uint32 + SyncJumpWidthMax uint32 + BitRatePreScalerMin uint32 + BitRatePreScalerMax uint32 + BitRatePreScalerInc uint32 + + ClockFrequency uint32 + + State uint32 + + Mask uint32 + Flags uint32 + + TxError uint16 + RxError uint16 + + RestartMs uint32 +} + +func (can *Can) Attrs() *LinkAttrs { + return &can.LinkAttrs +} + +func (can *Can) Type() string { + return "can" +} + +type IPoIB struct { + LinkAttrs + Pkey uint16 + Mode IPoIBMode + Umcast uint16 +} + +func (ipoib *IPoIB) Attrs() *LinkAttrs { + return &ipoib.LinkAttrs +} + +func (ipoib *IPoIB) Type() string { + return "ipoib" +} + +type BareUDP struct { + LinkAttrs + Port uint16 + EtherType uint16 + SrcPortMin uint16 + MultiProto bool +} + +func (bareudp *BareUDP) Attrs() *LinkAttrs { + return &bareudp.LinkAttrs +} + +func (bareudp *BareUDP) Type() string { + return "bareudp" +} + // iproute2 supported devices; // vlan | veth | vcan | dummy | ifb | macvlan | macvtap | // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | -// gre | gretap | ip6gre | ip6gretap | vti | nlmon | -// bond_slave | ipvlan +// gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon | +// bond_slave | ipvlan | xfrm | bareudp // LinkNotFoundError wraps the various not found errors when // getting/reading links. This is intended for better error diff --git a/vendor/github.com/vishvananda/netlink/link_linux.go b/vendor/github.com/vishvananda/netlink/link_linux.go index 0fe70295ef..276947a006 100644 --- a/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/link_linux.go @@ -4,32 +4,56 @@ import ( "bytes" "encoding/binary" "fmt" + "io/ioutil" "net" "os" + "strconv" + "strings" "syscall" "unsafe" "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) const ( SizeofLinkStats32 = 0x5c - SizeofLinkStats64 = 0xd8 - IFLA_STATS64 = 0x17 // syscall pkg does not contain this one + SizeofLinkStats64 = 0xb8 ) const ( - TUNTAP_MODE_TUN TuntapMode = syscall.IFF_TUN - TUNTAP_MODE_TAP TuntapMode = syscall.IFF_TAP - TUNTAP_DEFAULTS TuntapFlag = syscall.IFF_TUN_EXCL | syscall.IFF_ONE_QUEUE - TUNTAP_VNET_HDR TuntapFlag = syscall.IFF_VNET_HDR - TUNTAP_TUN_EXCL TuntapFlag = syscall.IFF_TUN_EXCL - TUNTAP_NO_PI TuntapFlag = syscall.IFF_NO_PI - TUNTAP_ONE_QUEUE TuntapFlag = syscall.IFF_ONE_QUEUE + TUNTAP_MODE_TUN TuntapMode = unix.IFF_TUN + TUNTAP_MODE_TAP TuntapMode = unix.IFF_TAP + TUNTAP_DEFAULTS TuntapFlag = unix.IFF_TUN_EXCL | unix.IFF_ONE_QUEUE + TUNTAP_VNET_HDR TuntapFlag = unix.IFF_VNET_HDR + TUNTAP_TUN_EXCL TuntapFlag = unix.IFF_TUN_EXCL + TUNTAP_NO_PI TuntapFlag = unix.IFF_NO_PI + TUNTAP_ONE_QUEUE TuntapFlag = unix.IFF_ONE_QUEUE + TUNTAP_MULTI_QUEUE TuntapFlag = unix.IFF_MULTI_QUEUE + TUNTAP_MULTI_QUEUE_DEFAULTS TuntapFlag = TUNTAP_MULTI_QUEUE | TUNTAP_NO_PI ) -var lookupByDump = false +var StringToTuntapModeMap = map[string]TuntapMode{ + "tun": TUNTAP_MODE_TUN, + "tap": TUNTAP_MODE_TAP, +} + +func (ttm TuntapMode) String() string { + switch ttm { + case TUNTAP_MODE_TUN: + return "tun" + case TUNTAP_MODE_TAP: + return "tap" + } + return "unknown" +} + +const ( + VF_LINK_STATE_AUTO uint32 = 0 + VF_LINK_STATE_ENABLE uint32 = 1 + VF_LINK_STATE_DISABLE uint32 = 2 +) var macvlanModes = [...]uint32{ 0, @@ -61,15 +85,15 @@ func (h *Handle) ensureIndex(link *LinkAttrs) { func (h *Handle) LinkSetARPOff(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change |= syscall.IFF_NOARP - msg.Flags |= syscall.IFF_NOARP + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change |= unix.IFF_NOARP + msg.Flags |= unix.IFF_NOARP msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -80,15 +104,15 @@ func LinkSetARPOff(link Link) error { func (h *Handle) LinkSetARPOn(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change |= syscall.IFF_NOARP - msg.Flags &= ^uint32(syscall.IFF_NOARP) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change |= unix.IFF_NOARP + msg.Flags &= ^uint32(unix.IFF_NOARP) msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -99,15 +123,205 @@ func LinkSetARPOn(link Link) error { func (h *Handle) SetPromiscOn(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_PROMISC + msg.Flags = unix.IFF_PROMISC + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast on` +func LinkSetAllmulticastOn(link Link) error { + return pkgHandle.LinkSetAllmulticastOn(link) +} + +// LinkSetAllmulticastOn enables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast on` +func (h *Handle) LinkSetAllmulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_ALLMULTI + msg.Flags = unix.IFF_ALLMULTI + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast off` +func LinkSetAllmulticastOff(link Link) error { + return pkgHandle.LinkSetAllmulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of all hardware multicast packets for the link device. +// Equivalent to: `ip link set $link allmulticast off` +func (h *Handle) LinkSetAllmulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_ALLMULTI + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func LinkSetMulticastOn(link Link) error { + return pkgHandle.LinkSetMulticastOn(link) +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func (h *Handle) LinkSetMulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Flags = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func LinkSetMulticastOff(link Link) error { + return pkgHandle.LinkSetMulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func (h *Handle) LinkSetMulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrAdd(link, addr) +} + +func (h *Handle) MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, []net.HardwareAddr{addr}, nl.MACVLAN_MACADDR_ADD) +} + +func MacvlanMACAddrDel(link Link, addr net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrDel(link, addr) +} + +func (h *Handle) MacvlanMACAddrDel(link Link, addr net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, []net.HardwareAddr{addr}, nl.MACVLAN_MACADDR_DEL) +} + +func MacvlanMACAddrFlush(link Link) error { + return pkgHandle.MacvlanMACAddrFlush(link) +} + +func (h *Handle) MacvlanMACAddrFlush(link Link) error { + return h.macvlanMACAddrChange(link, nil, nl.MACVLAN_MACADDR_FLUSH) +} + +func MacvlanMACAddrSet(link Link, addrs []net.HardwareAddr) error { + return pkgHandle.MacvlanMACAddrSet(link, addrs) +} + +func (h *Handle) MacvlanMACAddrSet(link Link, addrs []net.HardwareAddr) error { + return h.macvlanMACAddrChange(link, addrs, nl.MACVLAN_MACADDR_SET) +} + +func (h *Handle) macvlanMACAddrChange(link Link, addrs []net.HardwareAddr, mode uint32) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + inner := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + // IFLA_MACVLAN_MACADDR_MODE = mode + b := make([]byte, 4) + native.PutUint32(b, mode) + inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_MODE, b) + + // populate message with MAC addrs, if necessary + switch mode { + case nl.MACVLAN_MACADDR_ADD, nl.MACVLAN_MACADDR_DEL: + if len(addrs) == 1 { + inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addrs[0])) + } + case nl.MACVLAN_MACADDR_SET: + mad := inner.AddRtAttr(nl.IFLA_MACVLAN_MACADDR_DATA, nil) + for _, addr := range addrs { + mad.AddRtAttr(nl.IFLA_MACVLAN_MACADDR, []byte(addr)) + } + } + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetMacvlanMode sets the mode of a macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + return pkgHandle.LinkSetMacvlanMode(link, mode) +} + +// LinkSetMacvlanMode sets the mode of the macvlan or macvtap link device. +// Note that passthrough mode cannot be set to and from and will fail. +// Equivalent to: `ip link set $link type (macvlan|macvtap) mode $mode +func (h *Handle) LinkSetMacvlanMode(link Link, mode MacvlanMode) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change = syscall.IFF_PROMISC - msg.Flags = syscall.IFF_PROMISC + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[mode])) + + req.AddData(linkInfo) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -118,7 +332,17 @@ func BridgeSetMcastSnoop(link Link, on bool) error { func (h *Handle) BridgeSetMcastSnoop(link Link, on bool) error { bridge := link.(*Bridge) bridge.MulticastSnooping = &on - return h.linkModify(bridge, syscall.NLM_F_ACK) + return h.linkModify(bridge, unix.NLM_F_ACK) +} + +func BridgeSetVlanFiltering(link Link, on bool) error { + return pkgHandle.BridgeSetVlanFiltering(link, on) +} + +func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { + bridge := link.(*Bridge) + bridge.VlanFiltering = &on + return h.linkModify(bridge, unix.NLM_F_ACK) } func SetPromiscOn(link Link) error { @@ -128,15 +352,14 @@ func SetPromiscOn(link Link) error { func (h *Handle) SetPromiscOff(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change = syscall.IFF_PROMISC - msg.Flags = 0 & ^syscall.IFF_PROMISC + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_PROMISC msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -155,15 +378,15 @@ func LinkSetUp(link Link) error { func (h *Handle) LinkSetUp(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change = syscall.IFF_UP - msg.Flags = syscall.IFF_UP + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -178,15 +401,14 @@ func LinkSetDown(link Link) error { func (h *Handle) LinkSetDown(link Link) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) - msg.Change = syscall.IFF_UP - msg.Flags = 0 & ^syscall.IFF_UP + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_UP msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -201,19 +423,19 @@ func LinkSetMTU(link Link, mtu int) error { func (h *Handle) LinkSetMTU(link Link, mtu int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) b := make([]byte, 4) native.PutUint32(b, uint32(mtu)) - data := nl.NewRtAttr(syscall.IFLA_MTU, b) + data := nl.NewRtAttr(unix.IFLA_MTU, b) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -228,16 +450,16 @@ func LinkSetName(link Link, name string) error { func (h *Handle) LinkSetName(link Link, name string) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(syscall.IFLA_IFNAME, []byte(name)) + data := nl.NewRtAttr(unix.IFLA_IFNAME, []byte(name)) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -252,16 +474,16 @@ func LinkSetAlias(link Link, name string) error { func (h *Handle) LinkSetAlias(link Link, name string) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(syscall.IFLA_IFALIAS, []byte(name)) + data := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(name)) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -276,16 +498,16 @@ func LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { func (h *Handle) LinkSetHardwareAddr(link Link, hwaddr net.HardwareAddr) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(hwaddr)) + data := nl.NewRtAttr(unix.IFLA_ADDRESS, []byte(hwaddr)) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -300,22 +522,22 @@ func LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error { func (h *Handle) LinkSetVfHardwareAddr(link Link, vf int, hwaddr net.HardwareAddr) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfMac{ Vf: uint32(vf), } copy(vfmsg.Mac[:], []byte(hwaddr)) - nl.NewRtAttrChild(info, nl.IFLA_VF_MAC, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_MAC, vfmsg.Serialize()) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -330,22 +552,53 @@ func LinkSetVfVlan(link Link, vf, vlan int) error { func (h *Handle) LinkSetVfVlan(link Link, vf, vlan int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfVlan{ + Vf: uint32(vf), + Vlan: uint32(vlan), + } + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos` +func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + return pkgHandle.LinkSetVfVlanQos(link, vf, vlan, qos) +} + +// LinkSetVfVlanQos sets the vlan and qos priority of a vf for the link. +// Equivalent to: `ip link set $link vf $vf vlan $vlan qos $qos` +func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfVlan{ Vf: uint32(vf), Vlan: uint32(vlan), + Qos: uint32(qos), } - nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -360,22 +613,83 @@ func LinkSetVfTxRate(link Link, vf, rate int) error { func (h *Handle) LinkSetVfTxRate(link Link, vf, rate int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfTxRate{ Vf: uint32(vf), Rate: uint32(rate), } - nl.NewRtAttrChild(info, nl.IFLA_VF_TX_RATE, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_TX_RATE, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfRate sets the min and max tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate` +func LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + return pkgHandle.LinkSetVfRate(link, vf, minRate, maxRate) +} + +// LinkSetVfRate sets the min and max tx rate of a vf for the link. +// Equivalent to: `ip link set $link vf $vf min_tx_rate $min_rate max_tx_rate $max_rate` +func (h *Handle) LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfRate{ + Vf: uint32(vf), + MinTxRate: uint32(minRate), + MaxTxRate: uint32(maxRate), + } + info.AddRtAttr(nl.IFLA_VF_RATE, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfState enables/disables virtual link state on a vf. +// Equivalent to: `ip link set $link vf $vf state $state` +func LinkSetVfState(link Link, vf int, state uint32) error { + return pkgHandle.LinkSetVfState(link, vf, state) +} + +// LinkSetVfState enables/disables virtual link state on a vf. +// Equivalent to: `ip link set $link vf $vf state $state` +func (h *Handle) LinkSetVfState(link Link, vf int, state uint32) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfLinkState{ + Vf: uint32(vf), + LinkState: state, + } + info.AddRtAttr(nl.IFLA_VF_LINK_STATE, vfmsg.Serialize()) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -385,20 +699,20 @@ func LinkSetVfSpoofchk(link Link, vf int, check bool) error { return pkgHandle.LinkSetVfSpoofchk(link, vf, check) } -// LinkSetVfSpookfchk enables/disables spoof check on a vf for the link. +// LinkSetVfSpoofchk enables/disables spoof check on a vf for the link. // Equivalent to: `ip link set $link vf $vf spoofchk $check` func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error { var setting uint32 base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) if check { setting = 1 } @@ -406,10 +720,10 @@ func (h *Handle) LinkSetVfSpoofchk(link Link, vf int, check bool) error { Vf: uint32(vf), Setting: setting, } - nl.NewRtAttrChild(info, nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_SPOOFCHK, vfmsg.Serialize()) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -425,14 +739,14 @@ func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error { var setting uint32 base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - data := nl.NewRtAttr(nl.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) if state { setting = 1 } @@ -440,22 +754,66 @@ func (h *Handle) LinkSetVfTrust(link Link, vf int, state bool) error { Vf: uint32(vf), Setting: setting, } - nl.NewRtAttrChild(info, nl.IFLA_VF_TRUST, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_TRUST, vfmsg.Serialize()) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetVfNodeGUID sets the node GUID of a vf for the link. +// Equivalent to: `ip link set dev $link vf $vf node_guid $nodeguid` +func LinkSetVfNodeGUID(link Link, vf int, nodeguid net.HardwareAddr) error { + return pkgHandle.LinkSetVfGUID(link, vf, nodeguid, nl.IFLA_VF_IB_NODE_GUID) +} + +// LinkSetVfPortGUID sets the port GUID of a vf for the link. +// Equivalent to: `ip link set dev $link vf $vf port_guid $portguid` +func LinkSetVfPortGUID(link Link, vf int, portguid net.HardwareAddr) error { + return pkgHandle.LinkSetVfGUID(link, vf, portguid, nl.IFLA_VF_IB_PORT_GUID) +} + +// LinkSetVfGUID sets the node or port GUID of a vf for the link. +func (h *Handle) LinkSetVfGUID(link Link, vf int, vfGuid net.HardwareAddr, guidType int) error { + var err error + var guid uint64 + + buf := bytes.NewBuffer(vfGuid) + err = binary.Read(buf, binary.BigEndian, &guid) + if err != nil { + return err + } + + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) + vfmsg := nl.VfGUID{ + Vf: uint32(vf), + GUID: guid, + } + info.AddRtAttr(guidType, vfmsg.Serialize()) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err = req.Execute(unix.NETLINK_ROUTE, 0) return err } // LinkSetMaster sets the master of the link device. // Equivalent to: `ip link set $link master $master` -func LinkSetMaster(link Link, master *Bridge) error { +func LinkSetMaster(link Link, master Link) error { return pkgHandle.LinkSetMaster(link, master) } // LinkSetMaster sets the master of the link device. // Equivalent to: `ip link set $link master $master` -func (h *Handle) LinkSetMaster(link Link, master *Bridge) error { +func (h *Handle) LinkSetMaster(link Link, master Link) error { index := 0 if master != nil { masterBase := master.Attrs() @@ -491,19 +849,19 @@ func LinkSetMasterByIndex(link Link, masterIndex int) error { func (h *Handle) LinkSetMasterByIndex(link Link, masterIndex int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) b := make([]byte, 4) native.PutUint32(b, uint32(masterIndex)) - data := nl.NewRtAttr(syscall.IFLA_MASTER, b) + data := nl.NewRtAttr(unix.IFLA_MASTER, b) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -520,19 +878,19 @@ func LinkSetNsPid(link Link, nspid int) error { func (h *Handle) LinkSetNsPid(link Link, nspid int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) b := make([]byte, 4) native.PutUint32(b, uint32(nspid)) - data := nl.NewRtAttr(syscall.IFLA_NET_NS_PID, b) + data := nl.NewRtAttr(unix.IFLA_NET_NS_PID, b) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -549,36 +907,42 @@ func LinkSetNsFd(link Link, fd int) error { func (h *Handle) LinkSetNsFd(link Link, fd int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) b := make([]byte, 4) native.PutUint32(b, uint32(fd)) - data := nl.NewRtAttr(nl.IFLA_NET_NS_FD, b) + data := nl.NewRtAttr(unix.IFLA_NET_NS_FD, b) req.AddData(data) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } // LinkSetXdpFd adds a bpf function to the driver. The fd must be a bpf // program loaded with bpf(type=BPF_PROG_TYPE_XDP) func LinkSetXdpFd(link Link, fd int) error { + return LinkSetXdpFdWithFlags(link, fd, 0) +} + +// LinkSetXdpFdWithFlags adds a bpf function to the driver with the given +// options. The fd must be a bpf program loaded with bpf(type=BPF_PROG_TYPE_XDP) +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { base := link.Attrs() ensureIndex(base) - req := nl.NewNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := nl.NewNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - addXdpAttrs(&LinkXdp{Fd: fd}, req) + addXdpAttrs(&LinkXdp{Fd: fd, Flags: uint32(flags)}, req) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -595,67 +959,69 @@ type vxlanPortRange struct { } func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if vxlan.FlowBased { vxlan.VxlanId = 0 } - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId))) + data.AddRtAttr(nl.IFLA_VXLAN_ID, nl.Uint32Attr(uint32(vxlan.VxlanId))) if vxlan.VtepDevIndex != 0 { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex))) + data.AddRtAttr(nl.IFLA_VXLAN_LINK, nl.Uint32Attr(uint32(vxlan.VtepDevIndex))) } if vxlan.SrcAddr != nil { ip := vxlan.SrcAddr.To4() if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL, []byte(ip)) + data.AddRtAttr(nl.IFLA_VXLAN_LOCAL, []byte(ip)) } else { ip = vxlan.SrcAddr.To16() if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LOCAL6, []byte(ip)) + data.AddRtAttr(nl.IFLA_VXLAN_LOCAL6, []byte(ip)) } } } if vxlan.Group != nil { group := vxlan.Group.To4() if group != nil { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP, []byte(group)) + data.AddRtAttr(nl.IFLA_VXLAN_GROUP, []byte(group)) } else { group = vxlan.Group.To16() if group != nil { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GROUP6, []byte(group)) + data.AddRtAttr(nl.IFLA_VXLAN_GROUP6, []byte(group)) } } } - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL))) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS))) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning)) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy)) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC)) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss)) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss)) + data.AddRtAttr(nl.IFLA_VXLAN_TTL, nl.Uint8Attr(uint8(vxlan.TTL))) + data.AddRtAttr(nl.IFLA_VXLAN_TOS, nl.Uint8Attr(uint8(vxlan.TOS))) + data.AddRtAttr(nl.IFLA_VXLAN_LEARNING, boolAttr(vxlan.Learning)) + data.AddRtAttr(nl.IFLA_VXLAN_PROXY, boolAttr(vxlan.Proxy)) + data.AddRtAttr(nl.IFLA_VXLAN_RSC, boolAttr(vxlan.RSC)) + data.AddRtAttr(nl.IFLA_VXLAN_L2MISS, boolAttr(vxlan.L2miss)) + data.AddRtAttr(nl.IFLA_VXLAN_L3MISS, boolAttr(vxlan.L3miss)) + data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX, boolAttr(vxlan.UDP6ZeroCSumTx)) + data.AddRtAttr(nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX, boolAttr(vxlan.UDP6ZeroCSumRx)) if vxlan.UDPCSum { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum)) + data.AddRtAttr(nl.IFLA_VXLAN_UDP_CSUM, boolAttr(vxlan.UDPCSum)) } if vxlan.GBP { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_GBP, []byte{}) + data.AddRtAttr(nl.IFLA_VXLAN_GBP, []byte{}) } if vxlan.FlowBased { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased)) + data.AddRtAttr(nl.IFLA_VXLAN_FLOWBASED, boolAttr(vxlan.FlowBased)) } if vxlan.NoAge { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) + data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(0)) } else if vxlan.Age > 0 { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age))) + data.AddRtAttr(nl.IFLA_VXLAN_AGEING, nl.Uint32Attr(uint32(vxlan.Age))) } if vxlan.Limit > 0 { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit))) + data.AddRtAttr(nl.IFLA_VXLAN_LIMIT, nl.Uint32Attr(uint32(vxlan.Limit))) } if vxlan.Port > 0 { - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port))) + data.AddRtAttr(nl.IFLA_VXLAN_PORT, htons(uint16(vxlan.Port))) } if vxlan.PortLow > 0 || vxlan.PortHigh > 0 { pr := vxlanPortRange{uint16(vxlan.PortLow), uint16(vxlan.PortHigh)} @@ -663,100 +1029,106 @@ func addVxlanAttrs(vxlan *Vxlan, linkInfo *nl.RtAttr) { buf := new(bytes.Buffer) binary.Write(buf, binary.BigEndian, &pr) - nl.NewRtAttrChild(data, nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes()) + data.AddRtAttr(nl.IFLA_VXLAN_PORT_RANGE, buf.Bytes()) } } func addBondAttrs(bond *Bond, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if bond.Mode >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode))) + data.AddRtAttr(nl.IFLA_BOND_MODE, nl.Uint8Attr(uint8(bond.Mode))) } if bond.ActiveSlave >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave))) + data.AddRtAttr(nl.IFLA_BOND_ACTIVE_SLAVE, nl.Uint32Attr(uint32(bond.ActiveSlave))) } if bond.Miimon >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon))) + data.AddRtAttr(nl.IFLA_BOND_MIIMON, nl.Uint32Attr(uint32(bond.Miimon))) } if bond.UpDelay >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay))) + data.AddRtAttr(nl.IFLA_BOND_UPDELAY, nl.Uint32Attr(uint32(bond.UpDelay))) } if bond.DownDelay >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay))) + data.AddRtAttr(nl.IFLA_BOND_DOWNDELAY, nl.Uint32Attr(uint32(bond.DownDelay))) } if bond.UseCarrier >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier))) + data.AddRtAttr(nl.IFLA_BOND_USE_CARRIER, nl.Uint8Attr(uint8(bond.UseCarrier))) } if bond.ArpInterval >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval))) + data.AddRtAttr(nl.IFLA_BOND_ARP_INTERVAL, nl.Uint32Attr(uint32(bond.ArpInterval))) } if bond.ArpIpTargets != nil { - msg := nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_IP_TARGET, nil) + msg := data.AddRtAttr(nl.IFLA_BOND_ARP_IP_TARGET, nil) for i := range bond.ArpIpTargets { ip := bond.ArpIpTargets[i].To4() if ip != nil { - nl.NewRtAttrChild(msg, i, []byte(ip)) + msg.AddRtAttr(i, []byte(ip)) continue } ip = bond.ArpIpTargets[i].To16() if ip != nil { - nl.NewRtAttrChild(msg, i, []byte(ip)) + msg.AddRtAttr(i, []byte(ip)) } } } if bond.ArpValidate >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate))) + data.AddRtAttr(nl.IFLA_BOND_ARP_VALIDATE, nl.Uint32Attr(uint32(bond.ArpValidate))) } if bond.ArpAllTargets >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets))) + data.AddRtAttr(nl.IFLA_BOND_ARP_ALL_TARGETS, nl.Uint32Attr(uint32(bond.ArpAllTargets))) } if bond.Primary >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary))) + data.AddRtAttr(nl.IFLA_BOND_PRIMARY, nl.Uint32Attr(uint32(bond.Primary))) } if bond.PrimaryReselect >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect))) + data.AddRtAttr(nl.IFLA_BOND_PRIMARY_RESELECT, nl.Uint8Attr(uint8(bond.PrimaryReselect))) } if bond.FailOverMac >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac))) + data.AddRtAttr(nl.IFLA_BOND_FAIL_OVER_MAC, nl.Uint8Attr(uint8(bond.FailOverMac))) } if bond.XmitHashPolicy >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy))) + data.AddRtAttr(nl.IFLA_BOND_XMIT_HASH_POLICY, nl.Uint8Attr(uint8(bond.XmitHashPolicy))) } if bond.ResendIgmp >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp))) + data.AddRtAttr(nl.IFLA_BOND_RESEND_IGMP, nl.Uint32Attr(uint32(bond.ResendIgmp))) } if bond.NumPeerNotif >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif))) + data.AddRtAttr(nl.IFLA_BOND_NUM_PEER_NOTIF, nl.Uint8Attr(uint8(bond.NumPeerNotif))) } if bond.AllSlavesActive >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive))) + data.AddRtAttr(nl.IFLA_BOND_ALL_SLAVES_ACTIVE, nl.Uint8Attr(uint8(bond.AllSlavesActive))) } if bond.MinLinks >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks))) + data.AddRtAttr(nl.IFLA_BOND_MIN_LINKS, nl.Uint32Attr(uint32(bond.MinLinks))) } if bond.LpInterval >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval))) + data.AddRtAttr(nl.IFLA_BOND_LP_INTERVAL, nl.Uint32Attr(uint32(bond.LpInterval))) } - if bond.PackersPerSlave >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PackersPerSlave))) + if bond.PacketsPerSlave >= 0 { + data.AddRtAttr(nl.IFLA_BOND_PACKETS_PER_SLAVE, nl.Uint32Attr(uint32(bond.PacketsPerSlave))) } if bond.LacpRate >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate))) + data.AddRtAttr(nl.IFLA_BOND_AD_LACP_RATE, nl.Uint8Attr(uint8(bond.LacpRate))) } if bond.AdSelect >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect))) + data.AddRtAttr(nl.IFLA_BOND_AD_SELECT, nl.Uint8Attr(uint8(bond.AdSelect))) } if bond.AdActorSysPrio >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_ACTOR_SYS_PRIO, nl.Uint16Attr(uint16(bond.AdActorSysPrio))) + data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYS_PRIO, nl.Uint16Attr(uint16(bond.AdActorSysPrio))) } if bond.AdUserPortKey >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_USER_PORT_KEY, nl.Uint16Attr(uint16(bond.AdUserPortKey))) + data.AddRtAttr(nl.IFLA_BOND_AD_USER_PORT_KEY, nl.Uint16Attr(uint16(bond.AdUserPortKey))) } if bond.AdActorSystem != nil { - nl.NewRtAttrChild(data, nl.IFLA_BOND_AD_ACTOR_SYSTEM, []byte(bond.AdActorSystem)) + data.AddRtAttr(nl.IFLA_BOND_AD_ACTOR_SYSTEM, []byte(bond.AdActorSystem)) } if bond.TlbDynamicLb >= 0 { - nl.NewRtAttrChild(data, nl.IFLA_BOND_TLB_DYNAMIC_LB, nl.Uint8Attr(uint8(bond.TlbDynamicLb))) + data.AddRtAttr(nl.IFLA_BOND_TLB_DYNAMIC_LB, nl.Uint8Attr(uint8(bond.TlbDynamicLb))) + } +} + +func cleanupFds(fds []*os.File) { + for _, f := range fds { + f.Close() } } @@ -768,121 +1140,281 @@ func LinkAdd(link Link) error { } // LinkAdd adds a new link device. The type and features of the device -// are taken fromt the parameters in the link object. +// are taken from the parameters in the link object. // Equivalent to: `ip link add $link` func (h *Handle) LinkAdd(link Link) error { - return h.linkModify(link, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) +} + +func LinkModify(link Link) error { + return pkgHandle.LinkModify(link) +} + +func (h *Handle) LinkModify(link Link) error { + return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK) } func (h *Handle) linkModify(link Link, flags int) error { // TODO: support extra data for macvlan base := link.Attrs() - if base.Name == "" { - return fmt.Errorf("LinkAttrs.Name cannot be empty!") + // if tuntap, then the name can be empty, OS will provide a name + tuntap, isTuntap := link.(*Tuntap) + + if base.Name == "" && !isTuntap { + return fmt.Errorf("LinkAttrs.Name cannot be empty") } - if tuntap, ok := link.(*Tuntap); ok { - // TODO: support user - // TODO: support group - // TODO: multi_queue - // TODO: support non- persistent - if tuntap.Mode < syscall.IFF_TUN || tuntap.Mode > syscall.IFF_TAP { - return fmt.Errorf("Tuntap.Mode %v unknown!", tuntap.Mode) - } - file, err := os.OpenFile("/dev/net/tun", os.O_RDWR, 0) - if err != nil { - return err + if isTuntap { + if tuntap.Mode < unix.IFF_TUN || tuntap.Mode > unix.IFF_TAP { + return fmt.Errorf("Tuntap.Mode %v unknown", tuntap.Mode) } - defer file.Close() + + queues := tuntap.Queues + + var fds []*os.File var req ifReq - if tuntap.Flags == 0 { - req.Flags = uint16(TUNTAP_DEFAULTS) + copy(req.Name[:15], base.Name) + + req.Flags = uint16(tuntap.Flags) + + if queues == 0 { //Legacy compatibility + queues = 1 + if tuntap.Flags == 0 { + req.Flags = uint16(TUNTAP_DEFAULTS) + } } else { - req.Flags = uint16(tuntap.Flags) + // For best peformance set Flags to TUNTAP_MULTI_QUEUE_DEFAULTS | TUNTAP_VNET_HDR + // when a) KVM has support for this ABI and + // b) the value of the flag is queryable using the TUNGETIFF ioctl + if tuntap.Flags == 0 { + req.Flags = uint16(TUNTAP_MULTI_QUEUE_DEFAULTS) + } } + req.Flags |= uint16(tuntap.Mode) - copy(req.Name[:15], base.Name) - _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETIFF), uintptr(unsafe.Pointer(&req))) - if errno != 0 { - return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed, errno %v", errno) + const TUN = "/dev/net/tun" + for i := 0; i < queues; i++ { + localReq := req + fd, err := unix.Open(TUN, os.O_RDWR|syscall.O_CLOEXEC, 0) + if err != nil { + cleanupFds(fds) + return err + } + + _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&localReq))) + if errno != 0 { + // close the new fd + unix.Close(fd) + // and the already opened ones + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETIFF failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETOWNER, uintptr(tuntap.Owner)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETOWNER failed [%d], errno %v", i, errno) + } + + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.TUNSETGROUP, uintptr(tuntap.Group)) + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETGROUP failed [%d], errno %v", i, errno) + } + + // Set the tun device to non-blocking before use. The below comment + // taken from: + // + // https://github.com/mistsys/tuntap/commit/161418c25003bbee77d085a34af64d189df62bea + // + // Note there is a complication because in go, if a device node is + // opened, go sets it to use nonblocking I/O. However a /dev/net/tun + // doesn't work with epoll until after the TUNSETIFF ioctl has been + // done. So we open the unix fd directly, do the ioctl, then put the + // fd in nonblocking mode, an then finally wrap it in a os.File, + // which will see the nonblocking mode and add the fd to the + // pollable set, so later on when we Read() from it blocked the + // calling thread in the kernel. + // + // See + // https://github.com/golang/go/issues/30426 + // which got exposed in go 1.13 by the fix to + // https://github.com/golang/go/issues/30624 + err = unix.SetNonblock(fd, true) + if err != nil { + cleanupFds(fds) + return fmt.Errorf("Tuntap set to non-blocking failed [%d], err %v", i, err) + } + + // create the file from the file descriptor and store it + file := os.NewFile(uintptr(fd), TUN) + fds = append(fds, file) + + // 1) we only care for the name of the first tap in the multi queue set + // 2) if the original name was empty, the localReq has now the actual name + // + // In addition: + // This ensures that the link name is always identical to what the kernel returns. + // Not only in case of an empty name, but also when using name templates. + // e.g. when the provided name is "tap%d", the kernel replaces %d with the next available number. + if i == 0 { + link.Attrs().Name = strings.Trim(string(localReq.Name[:]), "\x00") + } + + } + + control := func(file *os.File, f func(fd uintptr)) error { + name := file.Name() + conn, err := file.SyscallConn() + if err != nil { + return fmt.Errorf("SyscallConn() failed on %s: %v", name, err) + } + if err := conn.Control(f); err != nil { + return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err) + } + return nil } - _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), uintptr(syscall.TUNSETPERSIST), 1) - if errno != 0 { - return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) + + // only persist interface if NonPersist is NOT set + if !tuntap.NonPersist { + var errno syscall.Errno + if err := control(fds[0], func(fd uintptr) { + _, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1) + }); err != nil { + return err + } + if errno != 0 { + cleanupFds(fds) + return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) + } } + h.ensureIndex(base) // can't set master during create, so set it afterwards if base.MasterIndex != 0 { // TODO: verify MasterIndex is actually a bridge? - return h.LinkSetMasterByIndex(link, base.MasterIndex) + err := h.LinkSetMasterByIndex(link, base.MasterIndex) + if err != nil { + // un-persist (e.g. allow the interface to be removed) the tuntap + // should not hurt if not set prior, condition might be not needed + if !tuntap.NonPersist { + // ignore error + _ = control(fds[0], func(fd uintptr) { + _, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0) + }) + } + cleanupFds(fds) + return err + } + } + + if tuntap.Queues == 0 { + cleanupFds(fds) + } else { + tuntap.Fds = fds } + return nil } - req := h.newNetlinkRequest(syscall.RTM_NEWLINK, flags) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, flags) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) // TODO: make it shorter if base.Flags&net.FlagUp != 0 { - msg.Change = syscall.IFF_UP - msg.Flags = syscall.IFF_UP + msg.Change = unix.IFF_UP + msg.Flags = unix.IFF_UP } if base.Flags&net.FlagBroadcast != 0 { - msg.Change |= syscall.IFF_BROADCAST - msg.Flags |= syscall.IFF_BROADCAST + msg.Change |= unix.IFF_BROADCAST + msg.Flags |= unix.IFF_BROADCAST } if base.Flags&net.FlagLoopback != 0 { - msg.Change |= syscall.IFF_LOOPBACK - msg.Flags |= syscall.IFF_LOOPBACK + msg.Change |= unix.IFF_LOOPBACK + msg.Flags |= unix.IFF_LOOPBACK } if base.Flags&net.FlagPointToPoint != 0 { - msg.Change |= syscall.IFF_POINTOPOINT - msg.Flags |= syscall.IFF_POINTOPOINT + msg.Change |= unix.IFF_POINTOPOINT + msg.Flags |= unix.IFF_POINTOPOINT } if base.Flags&net.FlagMulticast != 0 { - msg.Change |= syscall.IFF_MULTICAST - msg.Flags |= syscall.IFF_MULTICAST + msg.Change |= unix.IFF_MULTICAST + msg.Flags |= unix.IFF_MULTICAST + } + if base.Index != 0 { + msg.Index = int32(base.Index) } + req.AddData(msg) if base.ParentIndex != 0 { b := make([]byte, 4) native.PutUint32(b, uint32(base.ParentIndex)) - data := nl.NewRtAttr(syscall.IFLA_LINK, b) + data := nl.NewRtAttr(unix.IFLA_LINK, b) req.AddData(data) - } else if link.Type() == "ipvlan" { - return fmt.Errorf("Can't create ipvlan link without ParentIndex") + } else if link.Type() == "ipvlan" || link.Type() == "ipoib" { + return fmt.Errorf("Can't create %s link without ParentIndex", link.Type()) } - nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(base.Name)) req.AddData(nameData) + if base.Alias != "" { + alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(base.Alias)) + req.AddData(alias) + } + if base.MTU > 0 { - mtu := nl.NewRtAttr(syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) req.AddData(mtu) } if base.TxQLen >= 0 { - qlen := nl.NewRtAttr(syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + qlen := nl.NewRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) req.AddData(qlen) } if base.HardwareAddr != nil { - hwaddr := nl.NewRtAttr(syscall.IFLA_ADDRESS, []byte(base.HardwareAddr)) + hwaddr := nl.NewRtAttr(unix.IFLA_ADDRESS, []byte(base.HardwareAddr)) req.AddData(hwaddr) } + if base.NumTxQueues > 0 { + txqueues := nl.NewRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + req.AddData(txqueues) + } + + if base.NumRxQueues > 0 { + rxqueues := nl.NewRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) + req.AddData(rxqueues) + } + + if base.GSOMaxSegs > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SEGS, nl.Uint32Attr(base.GSOMaxSegs)) + req.AddData(gsoAttr) + } + + if base.GSOMaxSize > 0 { + gsoAttr := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, nl.Uint32Attr(base.GSOMaxSize)) + req.AddData(gsoAttr) + } + + if base.Group > 0 { + groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) + req.AddData(groupAttr) + } + if base.Namespace != nil { var attr *nl.RtAttr - switch base.Namespace.(type) { + switch ns := base.Namespace.(type) { case NsPid: - val := nl.Uint32Attr(uint32(base.Namespace.(NsPid))) - attr = nl.NewRtAttr(syscall.IFLA_NET_NS_PID, val) + val := nl.Uint32Attr(uint32(ns)) + attr = nl.NewRtAttr(unix.IFLA_NET_NS_PID, val) case NsFd: - val := nl.Uint32Attr(uint32(base.Namespace.(NsFd))) - attr = nl.NewRtAttr(nl.IFLA_NET_NS_FD, val) + val := nl.Uint32Attr(uint32(ns)) + attr = nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) } req.AddData(attr) @@ -892,60 +1424,102 @@ func (h *Handle) linkModify(link Link, flags int) error { addXdpAttrs(base.Xdp, req) } - linkInfo := nl.NewRtAttr(syscall.IFLA_LINKINFO, nil) - nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + linkInfo.AddRtAttr(nl.IFLA_INFO_KIND, nl.NonZeroTerminated(link.Type())) - if vlan, ok := link.(*Vlan); ok { + switch link := link.(type) { + case *Vlan: b := make([]byte, 2) - native.PutUint16(b, uint16(vlan.VlanId)) - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_VLAN_ID, b) - } else if veth, ok := link.(*Veth); ok { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - peer := nl.NewRtAttrChild(data, nl.VETH_INFO_PEER, nil) - nl.NewIfInfomsgChild(peer, syscall.AF_UNSPEC) - nl.NewRtAttrChild(peer, syscall.IFLA_IFNAME, nl.ZeroTerminated(veth.PeerName)) + native.PutUint16(b, uint16(link.VlanId)) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_VLAN_ID, b) + + if link.VlanProtocol != VLAN_PROTOCOL_UNKNOWN { + data.AddRtAttr(nl.IFLA_VLAN_PROTOCOL, htons(uint16(link.VlanProtocol))) + } + case *Veth: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + peer := data.AddRtAttr(nl.VETH_INFO_PEER, nil) + nl.NewIfInfomsgChild(peer, unix.AF_UNSPEC) + peer.AddRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(link.PeerName)) if base.TxQLen >= 0 { - nl.NewRtAttrChild(peer, syscall.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + peer.AddRtAttr(unix.IFLA_TXQLEN, nl.Uint32Attr(uint32(base.TxQLen))) + } + if base.NumTxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_TX_QUEUES, nl.Uint32Attr(uint32(base.NumTxQueues))) + } + if base.NumRxQueues > 0 { + peer.AddRtAttr(unix.IFLA_NUM_RX_QUEUES, nl.Uint32Attr(uint32(base.NumRxQueues))) } if base.MTU > 0 { - nl.NewRtAttrChild(peer, syscall.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) - } - - } else if vxlan, ok := link.(*Vxlan); ok { - addVxlanAttrs(vxlan, linkInfo) - } else if bond, ok := link.(*Bond); ok { - addBondAttrs(bond, linkInfo) - } else if ipv, ok := link.(*IPVlan); ok { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(ipv.Mode))) - } else if macv, ok := link.(*Macvlan); ok { - if macv.Mode != MACVLAN_MODE_DEFAULT { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode])) - } - } else if macv, ok := link.(*Macvtap); ok { - if macv.Mode != MACVLAN_MODE_DEFAULT { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[macv.Mode])) - } - } else if gretap, ok := link.(*Gretap); ok { - addGretapAttrs(gretap, linkInfo) - } else if iptun, ok := link.(*Iptun); ok { - addIptunAttrs(iptun, linkInfo) - } else if vti, ok := link.(*Vti); ok { - addVtiAttrs(vti, linkInfo) - } else if vrf, ok := link.(*Vrf); ok { - addVrfAttrs(vrf, linkInfo) - } else if bridge, ok := link.(*Bridge); ok { - addBridgeAttrs(bridge, linkInfo) - } else if gtp, ok := link.(*GTP); ok { - addGTPAttrs(gtp, linkInfo) + peer.AddRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(base.MTU))) + } + if link.PeerHardwareAddr != nil { + peer.AddRtAttr(unix.IFLA_ADDRESS, []byte(link.PeerHardwareAddr)) + } + if link.PeerNamespace != nil { + switch ns := link.PeerNamespace.(type) { + case NsPid: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_PID, val) + case NsFd: + val := nl.Uint32Attr(uint32(ns)) + peer.AddRtAttr(unix.IFLA_NET_NS_FD, val) + } + } + case *Vxlan: + addVxlanAttrs(link, linkInfo) + case *Bond: + addBondAttrs(link, linkInfo) + case *IPVlan: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *IPVtap: + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPVLAN_MODE, nl.Uint16Attr(uint16(link.Mode))) + data.AddRtAttr(nl.IFLA_IPVLAN_FLAG, nl.Uint16Attr(uint16(link.Flag))) + case *Macvlan: + if link.Mode != MACVLAN_MODE_DEFAULT { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) + } + case *Macvtap: + if link.Mode != MACVLAN_MODE_DEFAULT { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_MACVLAN_MODE, nl.Uint32Attr(macvlanModes[link.Mode])) + } + case *Geneve: + addGeneveAttrs(link, linkInfo) + case *Gretap: + addGretapAttrs(link, linkInfo) + case *Iptun: + addIptunAttrs(link, linkInfo) + case *Ip6tnl: + addIp6tnlAttrs(link, linkInfo) + case *Sittun: + addSittunAttrs(link, linkInfo) + case *Gretun: + addGretunAttrs(link, linkInfo) + case *Vti: + addVtiAttrs(link, linkInfo) + case *Vrf: + addVrfAttrs(link, linkInfo) + case *Bridge: + addBridgeAttrs(link, linkInfo) + case *GTP: + addGTPAttrs(link, linkInfo) + case *Xfrmi: + addXfrmiAttrs(link, linkInfo) + case *IPoIB: + addIPoIBAttrs(link, linkInfo) + case *BareUDP: + addBareUDPAttrs(link, linkInfo) } req.AddData(linkInfo) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { return err } @@ -975,13 +1549,13 @@ func (h *Handle) LinkDel(link Link) error { h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(base.Index) req.AddData(msg) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -1024,16 +1598,19 @@ func (h *Handle) LinkByName(name string) (Link, error) { return h.linkByNameDump(name) } - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) req.AddData(msg) - nameData := nl.NewRtAttr(syscall.IFLA_IFNAME, nl.ZeroTerminated(name)) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(name)) req.AddData(nameData) link, err := execGetLink(req) - if err == syscall.EINVAL { + if err == unix.EINVAL { // older kernels don't support looking up via IFLA_IFNAME // so fall back to dumping all links h.lookupByDump = true @@ -1056,16 +1633,19 @@ func (h *Handle) LinkByAlias(alias string) (Link, error) { return h.linkByAliasDump(alias) } - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) req.AddData(msg) - nameData := nl.NewRtAttr(syscall.IFLA_IFALIAS, nl.ZeroTerminated(alias)) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) + + nameData := nl.NewRtAttr(unix.IFLA_IFALIAS, nl.ZeroTerminated(alias)) req.AddData(nameData) link, err := execGetLink(req) - if err == syscall.EINVAL { + if err == unix.EINVAL { // older kernels don't support looking up via IFLA_IFALIAS // so fall back to dumping all links h.lookupByDump = true @@ -1082,20 +1662,22 @@ func LinkByIndex(index int) (Link, error) { // LinkByIndex finds a link by index and returns a pointer to the object. func (h *Handle) LinkByIndex(index int) (Link, error) { - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) msg.Index = int32(index) req.AddData(msg) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) return execGetLink(req) } func execGetLink(req *nl.NetlinkRequest) (Link, error) { - msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0) + msgs, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { if errno, ok := err.(syscall.Errno); ok { - if errno == syscall.ENODEV { + if errno == unix.ENODEV { return nil, LinkNotFoundError{fmt.Errorf("Link not found")} } } @@ -1114,9 +1696,9 @@ func execGetLink(req *nl.NetlinkRequest) (Link, error) { } } -// linkDeserialize deserializes a raw message received from netlink into +// LinkDeserialize deserializes a raw message received from netlink into // a link object. -func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { +func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { msg := nl.DeserializeIfInfomsg(m) attrs, err := nl.ParseRouteAttr(m[msg.Len():]) @@ -1124,19 +1706,33 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { return nil, err } - base := LinkAttrs{Index: int(msg.Index), RawFlags: msg.Flags, Flags: linkFlags(msg.Flags), EncapType: msg.EncapType()} - if msg.Flags&syscall.IFF_PROMISC != 0 { + base := NewLinkAttrs() + base.Index = int(msg.Index) + base.RawFlags = msg.Flags + base.Flags = linkFlags(msg.Flags) + base.EncapType = msg.EncapType() + base.NetNsID = -1 + if msg.Flags&unix.IFF_PROMISC != 0 { base.Promisc = 1 } + if msg.Flags&unix.IFF_ALLMULTI != 0 { + base.Allmulti = 1 + } + if msg.Flags&unix.IFF_MULTICAST != 0 { + base.Multi = 1 + } + var ( - link Link - stats32 []byte - stats64 []byte - linkType string + link Link + stats32 *LinkStatistics32 + stats64 *LinkStatistics64 + linkType string + linkSlave LinkSlave + slaveType string ) for _, attr := range attrs { switch attr.Attr.Type { - case syscall.IFLA_LINKINFO: + case unix.IFLA_LINKINFO: infos, err := nl.ParseRouteAttr(attr.Value) if err != nil { return nil, err @@ -1156,26 +1752,52 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { link = &Vlan{} case "veth": link = &Veth{} + case "wireguard": + link = &Wireguard{} case "vxlan": link = &Vxlan{} case "bond": link = &Bond{} case "ipvlan": link = &IPVlan{} + case "ipvtap": + link = &IPVtap{} case "macvlan": link = &Macvlan{} case "macvtap": link = &Macvtap{} + case "geneve": + link = &Geneve{} case "gretap": link = &Gretap{} + case "ip6gretap": + link = &Gretap{} case "ipip": link = &Iptun{} - case "vti": + case "ip6tnl": + link = &Ip6tnl{} + case "sit": + link = &Sittun{} + case "gre": + link = &Gretun{} + case "ip6gre": + link = &Gretun{} + case "vti", "vti6": link = &Vti{} case "vrf": link = &Vrf{} case "gtp": link = >P{} + case "xfrm": + link = &Xfrmi{} + case "tun": + link = &Tuntap{} + case "ipoib": + link = &IPoIB{} + case "can": + link = &Can{} + case "bareudp": + link = &BareUDP{} default: link = &GenericLink{LinkType: linkType} } @@ -1193,15 +1815,29 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { parseBondData(link, data) case "ipvlan": parseIPVlanData(link, data) + case "ipvtap": + parseIPVtapData(link, data) case "macvlan": parseMacvlanData(link, data) case "macvtap": parseMacvtapData(link, data) + case "geneve": + parseGeneveData(link, data) case "gretap": parseGretapData(link, data) + case "ip6gretap": + parseGretapData(link, data) case "ipip": parseIptunData(link, data) - case "vti": + case "ip6tnl": + parseIp6tnlData(link, data) + case "sit": + parseSittunData(link, data) + case "gre": + parseGretunData(link, data) + case "ip6gre": + parseGretunData(link, data) + case "vti", "vti6": parseVtiData(link, data) case "vrf": parseVrfData(link, data) @@ -1209,10 +1845,45 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { parseBridgeData(link, data) case "gtp": parseGTPData(link, data) + case "xfrm": + parseXfrmiData(link, data) + case "tun": + parseTuntapData(link, data) + case "ipoib": + parseIPoIBData(link, data) + case "can": + parseCanData(link, data) + case "bareudp": + parseBareUDPData(link, data) + } + + case nl.IFLA_INFO_SLAVE_KIND: + slaveType = string(info.Value[:len(info.Value)-1]) + switch slaveType { + case "bond": + linkSlave = &BondSlave{} + case "vrf": + linkSlave = &VrfSlave{} + } + + case nl.IFLA_INFO_SLAVE_DATA: + switch slaveType { + case "bond": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseBondSlaveData(linkSlave, data) + case "vrf": + data, err := nl.ParseRouteAttr(info.Value) + if err != nil { + return nil, err + } + parseVrfSlaveData(linkSlave, data) } } } - case syscall.IFLA_ADDRESS: + case unix.IFLA_ADDRESS: var nonzero bool for _, b := range attr.Value { if b != 0 { @@ -1222,46 +1893,77 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { if nonzero { base.HardwareAddr = attr.Value[:] } - case syscall.IFLA_IFNAME: + case unix.IFLA_IFNAME: base.Name = string(attr.Value[:len(attr.Value)-1]) - case syscall.IFLA_MTU: + case unix.IFLA_MTU: base.MTU = int(native.Uint32(attr.Value[0:4])) - case syscall.IFLA_LINK: + case unix.IFLA_LINK: base.ParentIndex = int(native.Uint32(attr.Value[0:4])) - case syscall.IFLA_MASTER: + case unix.IFLA_MASTER: base.MasterIndex = int(native.Uint32(attr.Value[0:4])) - case syscall.IFLA_TXQLEN: + case unix.IFLA_TXQLEN: base.TxQLen = int(native.Uint32(attr.Value[0:4])) - case syscall.IFLA_IFALIAS: + case unix.IFLA_IFALIAS: base.Alias = string(attr.Value[:len(attr.Value)-1]) - case syscall.IFLA_STATS: - stats32 = attr.Value[:] - case IFLA_STATS64: - stats64 = attr.Value[:] - case nl.IFLA_XDP: + case unix.IFLA_STATS: + stats32 = new(LinkStatistics32) + if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats32); err != nil { + return nil, err + } + case unix.IFLA_STATS64: + stats64 = new(LinkStatistics64) + if err := binary.Read(bytes.NewBuffer(attr.Value[:]), nl.NativeEndian(), stats64); err != nil { + return nil, err + } + case unix.IFLA_XDP: xdp, err := parseLinkXdp(attr.Value[:]) if err != nil { return nil, err } base.Xdp = xdp - case syscall.IFLA_PROTINFO | syscall.NLA_F_NESTED: - if hdr != nil && hdr.Type == syscall.RTM_NEWLINK && - msg.Family == syscall.AF_BRIDGE { + case unix.IFLA_PROTINFO | unix.NLA_F_NESTED: + if hdr != nil && hdr.Type == unix.RTM_NEWLINK && + msg.Family == unix.AF_BRIDGE { attrs, err := nl.ParseRouteAttr(attr.Value[:]) if err != nil { return nil, err } - base.Protinfo = parseProtinfo(attrs) + protinfo := parseProtinfo(attrs) + base.Protinfo = &protinfo } - case syscall.IFLA_OPERSTATE: + case unix.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) + case unix.IFLA_PHYS_SWITCH_ID: + base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_LINK_NETNSID: + base.NetNsID = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_GSO_MAX_SIZE: + base.GSOMaxSize = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GSO_MAX_SEGS: + base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_VFINFO_LIST: + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + vfs, err := parseVfInfoList(data) + if err != nil { + return nil, err + } + base.Vfs = vfs + case unix.IFLA_NUM_TX_QUEUES: + base.NumTxQueues = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_NUM_RX_QUEUES: + base.NumRxQueues = int(native.Uint32(attr.Value[0:4])) + case unix.IFLA_GROUP: + base.Group = native.Uint32(attr.Value[0:4]) } } if stats64 != nil { - base.Statistics = parseLinkStats64(stats64) + base.Statistics = (*LinkStatistics)(stats64) } else if stats32 != nil { - base.Statistics = parseLinkStats32(stats32) + base.Statistics = (*LinkStatistics)(stats32.to64()) } // Links that don't have IFLA_INFO_KIND are hardware devices @@ -1269,10 +1971,59 @@ func LinkDeserialize(hdr *syscall.NlMsghdr, m []byte) (Link, error) { link = &Device{} } *link.Attrs() = base + link.Attrs().Slave = linkSlave + + // If the tuntap attributes are not updated by netlink due to + // an older driver, use sysfs + if link != nil && linkType == "tun" { + tuntap := link.(*Tuntap) + + if tuntap.Mode == 0 { + ifname := tuntap.Attrs().Name + if flags, err := readSysPropAsInt64(ifname, "tun_flags"); err == nil { + + if flags&unix.IFF_TUN != 0 { + tuntap.Mode = unix.IFF_TUN + } else if flags&unix.IFF_TAP != 0 { + tuntap.Mode = unix.IFF_TAP + } + + tuntap.NonPersist = false + if flags&unix.IFF_PERSIST == 0 { + tuntap.NonPersist = true + } + } + + // The sysfs interface for owner/group returns -1 for root user, instead of returning 0. + // So explicitly check for negative value, before assigning the owner uid/gid. + if owner, err := readSysPropAsInt64(ifname, "owner"); err == nil && owner > 0 { + tuntap.Owner = uint32(owner) + } + + if group, err := readSysPropAsInt64(ifname, "group"); err == nil && group > 0 { + tuntap.Group = uint32(group) + } + } + } return link, nil } +func readSysPropAsInt64(ifname, prop string) (int64, error) { + fname := fmt.Sprintf("/sys/class/net/%s/%s", ifname, prop) + contents, err := ioutil.ReadFile(fname) + if err != nil { + return 0, err + } + + num, err := strconv.ParseInt(strings.TrimSpace(string(contents)), 0, 64) + if err == nil { + return num, nil + } + + return 0, err +} + // LinkList gets a list of link devices. // Equivalent to: `ip link show` func LinkList() ([]Link, error) { @@ -1284,12 +2035,14 @@ func LinkList() ([]Link, error) { func (h *Handle) LinkList() ([]Link, error) { // NOTE(vish): This duplicates functionality in net/iface_linux.go, but we need // to get the message ourselves to parse link type. - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) - msg := nl.NewIfInfomsg(syscall.AF_UNSPEC) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) req.AddData(msg) + attr := nl.NewRtAttr(unix.IFLA_EXT_MASK, nl.Uint32Attr(nl.RTEXT_FILTER_VF)) + req.AddData(attr) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK) if err != nil { return nil, err } @@ -1309,24 +2062,43 @@ func (h *Handle) LinkList() ([]Link, error) { // LinkUpdate is used to pass information back from LinkSubscribe() type LinkUpdate struct { nl.IfInfomsg - Header syscall.NlMsghdr + Header unix.NlMsghdr Link } // LinkSubscribe takes a chan down which notifications will be sent // when links change. Close the 'done' chan to stop subscription. func LinkSubscribe(ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribe(netns.None(), netns.None(), ch, done) + return linkSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) } // LinkSubscribeAt works like LinkSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func LinkSubscribeAt(ns netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - return linkSubscribe(ns, netns.None(), ch, done) + return linkSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// LinkSubscribeOptions contains a set of options to use with +// LinkSubscribeWithOptions. +type LinkSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool } -func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}) error { - s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_LINK) +// LinkSubscribeWithOptions work like LinkSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func LinkSubscribeWithOptions(ch chan<- LinkUpdate, done <-chan struct{}, options LinkSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return linkSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_LINK) if err != nil { return err } @@ -1336,20 +2108,57 @@ func linkSubscribe(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-cha s.Close() }() } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETLINK, + unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(msg) + if err := s.Send(req); err != nil { + return err + } + } go func() { defer close(ch) for { - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } return } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } ifmsg := nl.DeserializeIfInfomsg(m.Data) - link, err := LinkDeserialize(&m.Header, m.Data) + header := unix.NlMsghdr(m.Header) + link, err := LinkDeserialize(&header, m.Data) if err != nil { - return + if cberr != nil { + cberr(err) + } + continue } - ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: m.Header, Link: link} + ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link} } } }() @@ -1424,28 +2233,86 @@ func (h *Handle) LinkSetBrProxyArpWiFi(link Link, mode bool) error { func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { base := link.Attrs() h.ensureIndex(base) - req := h.newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) - msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) msg.Index = int32(base.Index) req.AddData(msg) - br := nl.NewRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil) - nl.NewRtAttrChild(br, attr, boolToByte(mode)) + br := nl.NewRtAttr(unix.IFLA_PROTINFO|unix.NLA_F_NESTED, nil) + br.AddRtAttr(attr, boolToByte(mode)) req.AddData(br) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { return err } return nil } +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func LinkSetTxQLen(link Link, qlen int) error { + return pkgHandle.LinkSetTxQLen(link, qlen) +} + +// LinkSetTxQLen sets the transaction queue length for the link. +// Equivalent to: `ip link set $link txqlen $qlen` +func (h *Handle) LinkSetTxQLen(link Link, qlen int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(qlen)) + + data := nl.NewRtAttr(unix.IFLA_TXQLEN, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGroup sets the link group id which can be used to perform mass actions +// with iproute2 as well use it as a reference in nft filters. +// Equivalent to: `ip link set $link group $id` +func LinkSetGroup(link Link, group int) error { + return pkgHandle.LinkSetGroup(link, group) +} + +// LinkSetGroup sets the link group id which can be used to perform mass actions +// with iproute2 as well use it as a reference in nft filters. +// Equivalent to: `ip link set $link group $id` +func (h *Handle) LinkSetGroup(link Link, group int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(group)) + + data := nl.NewRtAttr(unix.IFLA_GROUP, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { vlan := link.(*Vlan) for _, datum := range data { switch datum.Attr.Type { case nl.IFLA_VLAN_ID: vlan.VlanId = int(native.Uint16(datum.Value[0:2])) + case nl.IFLA_VLAN_PROTOCOL: + vlan.VlanProtocol = VlanProtocol(int(ntohs(datum.Value[0:2]))) } } } @@ -1453,6 +2320,13 @@ func parseVlanData(link Link, data []syscall.NetlinkRouteAttr) { func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan := link.(*Vxlan) for _, datum := range data { + // NOTE(vish): Apparently some messages can be sent with no value. + // We special case GBP here to not change existing + // functionality. It appears that GBP sends a datum.Value + // of null. + if len(datum.Value) == 0 && datum.Attr.Type != nl.IFLA_VXLAN_GBP { + continue + } switch datum.Attr.Type { case nl.IFLA_VXLAN_ID: vxlan.VxlanId = int(native.Uint32(datum.Value[0:4])) @@ -1482,6 +2356,10 @@ func parseVxlanData(link Link, data []syscall.NetlinkRouteAttr) { vxlan.L3miss = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_UDP_CSUM: vxlan.UDPCSum = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_UDP_ZERO_CSUM6_TX: + vxlan.UDP6ZeroCSumTx = int8(datum.Value[0]) != 0 + case nl.IFLA_VXLAN_UDP_ZERO_CSUM6_RX: + vxlan.UDP6ZeroCSumRx = int8(datum.Value[0]) != 0 case nl.IFLA_VXLAN_GBP: vxlan.GBP = true case nl.IFLA_VXLAN_FLOWBASED: @@ -1523,7 +2401,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BOND_ARP_INTERVAL: bond.ArpInterval = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_ARP_IP_TARGET: - // TODO: implement + bond.ArpIpTargets = parseBondArpIpTargets(data[i].Value) case nl.IFLA_BOND_ARP_VALIDATE: bond.ArpValidate = BondArpValidate(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_ARP_ALL_TARGETS: @@ -1547,7 +2425,7 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BOND_LP_INTERVAL: bond.LpInterval = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_PACKETS_PER_SLAVE: - bond.PackersPerSlave = int(native.Uint32(data[i].Value[0:4])) + bond.PacketsPerSlave = int(native.Uint32(data[i].Value[0:4])) case nl.IFLA_BOND_AD_LACP_RATE: bond.LacpRate = BondLacpRate(data[i].Value[0]) case nl.IFLA_BOND_AD_SELECT: @@ -1566,12 +2444,97 @@ func parseBondData(link Link, data []syscall.NetlinkRouteAttr) { } } +func parseBondArpIpTargets(value []byte) []net.IP { + data, err := nl.ParseRouteAttr(value) + if err != nil { + return nil + } + + targets := []net.IP{} + for i := range data { + target := net.IP(data[i].Value) + if ip := target.To4(); ip != nil { + targets = append(targets, ip) + continue + } + if ip := target.To16(); ip != nil { + targets = append(targets, ip) + } + } + + return targets +} + +func addBondSlaveAttrs(bondSlave *BondSlave, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil) + + data.AddRtAttr(nl.IFLA_BOND_SLAVE_STATE, nl.Uint8Attr(uint8(bondSlave.State))) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_MII_STATUS, nl.Uint8Attr(uint8(bondSlave.MiiStatus))) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, nl.Uint32Attr(bondSlave.LinkFailureCount)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(bondSlave.QueueId)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, nl.Uint16Attr(bondSlave.AggregatorId)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, nl.Uint8Attr(bondSlave.AdActorOperPortState)) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, nl.Uint16Attr(bondSlave.AdPartnerOperPortState)) + + if mac := bondSlave.PermHardwareAddr; mac != nil { + data.AddRtAttr(nl.IFLA_BOND_SLAVE_PERM_HWADDR, []byte(mac)) + } +} + +func parseBondSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + bondSlave := slave.(*BondSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + bondSlave.State = BondSlaveState(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_MII_STATUS: + bondSlave.MiiStatus = BondSlaveMiiStatus(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_LINK_FAILURE_COUNT: + bondSlave.LinkFailureCount = native.Uint32(data[i].Value[0:4]) + case nl.IFLA_BOND_SLAVE_PERM_HWADDR: + bondSlave.PermHardwareAddr = net.HardwareAddr(data[i].Value[0:6]) + case nl.IFLA_BOND_SLAVE_QUEUE_ID: + bondSlave.QueueId = native.Uint16(data[i].Value[0:2]) + case nl.IFLA_BOND_SLAVE_AD_AGGREGATOR_ID: + bondSlave.AggregatorId = native.Uint16(data[i].Value[0:2]) + case nl.IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE: + bondSlave.AdActorOperPortState = uint8(data[i].Value[0]) + case nl.IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE: + bondSlave.AdPartnerOperPortState = native.Uint16(data[i].Value[0:2]) + } + } +} + +func parseVrfSlaveData(slave LinkSlave, data []syscall.NetlinkRouteAttr) { + vrfSlave := slave.(*VrfSlave) + for i := range data { + switch data[i].Attr.Type { + case nl.IFLA_BOND_SLAVE_STATE: + vrfSlave.Table = native.Uint32(data[i].Value[0:4]) + } + } +} + func parseIPVlanData(link Link, data []syscall.NetlinkRouteAttr) { ipv := link.(*IPVlan) for _, datum := range data { - if datum.Attr.Type == nl.IFLA_IPVLAN_MODE { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: + ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) + } + } +} + +func parseIPVtapData(link Link, data []syscall.NetlinkRouteAttr) { + ipv := link.(*IPVtap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPVLAN_MODE: ipv.Mode = IPVlanMode(native.Uint32(datum.Value[0:4])) - return + case nl.IFLA_IPVLAN_FLAG: + ipv.Flag = IPVlanFlag(native.Uint32(datum.Value[0:4])) } } } @@ -1584,7 +2547,8 @@ func parseMacvtapData(link Link, data []syscall.NetlinkRouteAttr) { func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { macv := link.(*Macvlan) for _, datum := range data { - if datum.Attr.Type == nl.IFLA_MACVLAN_MODE { + switch datum.Attr.Type { + case nl.IFLA_MACVLAN_MODE: switch native.Uint32(datum.Value[0:4]) { case nl.MACVLAN_MODE_PRIVATE: macv.Mode = MACVLAN_MODE_PRIVATE @@ -1597,7 +2561,16 @@ func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { case nl.MACVLAN_MODE_SOURCE: macv.Mode = MACVLAN_MODE_SOURCE } - return + case nl.IFLA_MACVLAN_MACADDR_COUNT: + macv.MACAddrs = make([]net.HardwareAddr, 0, int(native.Uint32(datum.Value[0:4]))) + case nl.IFLA_MACVLAN_MACADDR_DATA: + macs, err := nl.ParseRouteAttr(datum.Value[:]) + if err != nil { + panic(fmt.Sprintf("failed to ParseRouteAttr for IFLA_MACVLAN_MACADDR_DATA: %v", err)) + } + for _, macDatum := range macs { + macv.MACAddrs = append(macv.MACAddrs, net.HardwareAddr(macDatum.Value[0:6])) + } } } } @@ -1605,66 +2578,123 @@ func parseMacvlanData(link Link, data []syscall.NetlinkRouteAttr) { // copied from pkg/net_linux.go func linkFlags(rawFlags uint32) net.Flags { var f net.Flags - if rawFlags&syscall.IFF_UP != 0 { + if rawFlags&unix.IFF_UP != 0 { f |= net.FlagUp } - if rawFlags&syscall.IFF_BROADCAST != 0 { + if rawFlags&unix.IFF_BROADCAST != 0 { f |= net.FlagBroadcast } - if rawFlags&syscall.IFF_LOOPBACK != 0 { + if rawFlags&unix.IFF_LOOPBACK != 0 { f |= net.FlagLoopback } - if rawFlags&syscall.IFF_POINTOPOINT != 0 { + if rawFlags&unix.IFF_POINTOPOINT != 0 { f |= net.FlagPointToPoint } - if rawFlags&syscall.IFF_MULTICAST != 0 { + if rawFlags&unix.IFF_MULTICAST != 0 { f |= net.FlagMulticast } return f } +func addGeneveAttrs(geneve *Geneve, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if geneve.FlowBased { + // In flow based mode, no other attributes need to be configured + linkInfo.AddRtAttr(nl.IFLA_GENEVE_COLLECT_METADATA, boolAttr(geneve.FlowBased)) + return + } + + if ip := geneve.Remote; ip != nil { + if ip4 := ip.To4(); ip4 != nil { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE, ip.To4()) + } else { + data.AddRtAttr(nl.IFLA_GENEVE_REMOTE6, []byte(ip)) + } + } + + if geneve.ID != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_ID, nl.Uint32Attr(geneve.ID)) + } + + if geneve.Dport != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_PORT, htons(geneve.Dport)) + } + + if geneve.Ttl != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TTL, nl.Uint8Attr(geneve.Ttl)) + } + + if geneve.Tos != 0 { + data.AddRtAttr(nl.IFLA_GENEVE_TOS, nl.Uint8Attr(geneve.Tos)) + } +} + +func parseGeneveData(link Link, data []syscall.NetlinkRouteAttr) { + geneve := link.(*Geneve) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GENEVE_ID: + geneve.ID = native.Uint32(datum.Value[0:4]) + case nl.IFLA_GENEVE_REMOTE, nl.IFLA_GENEVE_REMOTE6: + geneve.Remote = datum.Value + case nl.IFLA_GENEVE_PORT: + geneve.Dport = ntohs(datum.Value[0:2]) + case nl.IFLA_GENEVE_TTL: + geneve.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GENEVE_TOS: + geneve.Tos = uint8(datum.Value[0]) + } + } +} + func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if gretap.FlowBased { // In flow based mode, no other attributes need to be configured - nl.NewRtAttrChild(data, nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) return } - ip := gretap.Local.To4() - if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_GRE_LOCAL, []byte(ip)) + if ip := gretap.Local; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip)) } - ip = gretap.Remote.To4() - if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_GRE_REMOTE, []byte(ip)) + + if ip := gretap.Remote; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip)) } if gretap.IKey != 0 { - nl.NewRtAttrChild(data, nl.IFLA_GRE_IKEY, htonl(gretap.IKey)) + data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gretap.IKey)) gretap.IFlags |= uint16(nl.GRE_KEY) } if gretap.OKey != 0 { - nl.NewRtAttrChild(data, nl.IFLA_GRE_OKEY, htonl(gretap.OKey)) + data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gretap.OKey)) gretap.OFlags |= uint16(nl.GRE_KEY) } - nl.NewRtAttrChild(data, nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags)) + data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gretap.IFlags)) + data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gretap.OFlags)) if gretap.Link != 0 { - nl.NewRtAttrChild(data, nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link)) + data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gretap.Link)) } - nl.NewRtAttrChild(data, nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport)) - nl.NewRtAttrChild(data, nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport)) + data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gretap.PMtuDisc)) + data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gretap.Ttl)) + data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gretap.Tos)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gretap.EncapType)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gretap.EncapFlags)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gretap.EncapSport)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gretap.EncapDport)) } func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { @@ -1676,9 +2706,9 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_GRE_IKEY: gre.OKey = ntohl(datum.Value[0:4]) case nl.IFLA_GRE_LOCAL: - gre.Local = net.IP(datum.Value[0:4]) + gre.Local = net.IP(datum.Value) case nl.IFLA_GRE_REMOTE: - gre.Remote = net.IP(datum.Value[0:4]) + gre.Remote = net.IP(datum.Value) case nl.IFLA_GRE_ENCAP_SPORT: gre.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_DPORT: @@ -1687,7 +2717,6 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { gre.IFlags = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_OFLAGS: gre.OFlags = ntohs(datum.Value[0:2]) - case nl.IFLA_GRE_TTL: gre.Ttl = uint8(datum.Value[0]) case nl.IFLA_GRE_TOS: @@ -1699,26 +2728,98 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_GRE_ENCAP_FLAGS: gre.EncapFlags = native.Uint16(datum.Value[0:2]) case nl.IFLA_GRE_COLLECT_METADATA: - gre.FlowBased = int8(datum.Value[0]) != 0 + gre.FlowBased = true } } } -func parseLinkStats32(data []byte) *LinkStatistics { - return (*LinkStatistics)((*LinkStatistics32)(unsafe.Pointer(&data[0:SizeofLinkStats32][0])).to64()) +func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if ip := gre.Local; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_LOCAL, []byte(ip)) + } + + if ip := gre.Remote; ip != nil { + if ip.To4() != nil { + ip = ip.To4() + } + data.AddRtAttr(nl.IFLA_GRE_REMOTE, []byte(ip)) + } + + if gre.IKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_IKEY, htonl(gre.IKey)) + gre.IFlags |= uint16(nl.GRE_KEY) + } + + if gre.OKey != 0 { + data.AddRtAttr(nl.IFLA_GRE_OKEY, htonl(gre.OKey)) + gre.OFlags |= uint16(nl.GRE_KEY) + } + + data.AddRtAttr(nl.IFLA_GRE_IFLAGS, htons(gre.IFlags)) + data.AddRtAttr(nl.IFLA_GRE_OFLAGS, htons(gre.OFlags)) + + if gre.Link != 0 { + data.AddRtAttr(nl.IFLA_GRE_LINK, nl.Uint32Attr(gre.Link)) + } + + data.AddRtAttr(nl.IFLA_GRE_PMTUDISC, nl.Uint8Attr(gre.PMtuDisc)) + data.AddRtAttr(nl.IFLA_GRE_TTL, nl.Uint8Attr(gre.Ttl)) + data.AddRtAttr(nl.IFLA_GRE_TOS, nl.Uint8Attr(gre.Tos)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_TYPE, nl.Uint16Attr(gre.EncapType)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_FLAGS, nl.Uint16Attr(gre.EncapFlags)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_SPORT, htons(gre.EncapSport)) + data.AddRtAttr(nl.IFLA_GRE_ENCAP_DPORT, htons(gre.EncapDport)) } -func parseLinkStats64(data []byte) *LinkStatistics { - return (*LinkStatistics)((*LinkStatistics64)(unsafe.Pointer(&data[0:SizeofLinkStats64][0]))) +func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { + gre := link.(*Gretun) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_GRE_IKEY: + gre.IKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_OKEY: + gre.OKey = ntohl(datum.Value[0:4]) + case nl.IFLA_GRE_LOCAL: + gre.Local = net.IP(datum.Value) + case nl.IFLA_GRE_REMOTE: + gre.Remote = net.IP(datum.Value) + case nl.IFLA_GRE_IFLAGS: + gre.IFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_OFLAGS: + gre.OFlags = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_TTL: + gre.Ttl = uint8(datum.Value[0]) + case nl.IFLA_GRE_TOS: + gre.Tos = uint8(datum.Value[0]) + case nl.IFLA_GRE_PMTUDISC: + gre.PMtuDisc = uint8(datum.Value[0]) + case nl.IFLA_GRE_ENCAP_TYPE: + gre.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_FLAGS: + gre.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_SPORT: + gre.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_ENCAP_DPORT: + gre.EncapDport = ntohs(datum.Value[0:2]) + } + } } func addXdpAttrs(xdp *LinkXdp, req *nl.NetlinkRequest) { - attrs := nl.NewRtAttr(nl.IFLA_XDP|syscall.NLA_F_NESTED, nil) + attrs := nl.NewRtAttr(unix.IFLA_XDP|unix.NLA_F_NESTED, nil) b := make([]byte, 4) native.PutUint32(b, uint32(xdp.Fd)) - nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FD, b) - native.PutUint32(b, xdp.Flags) - nl.NewRtAttrChild(attrs, nl.IFLA_XDP_FLAGS, b) + attrs.AddRtAttr(nl.IFLA_XDP_FD, b) + if xdp.Flags != 0 { + b := make([]byte, 4) + native.PutUint32(b, xdp.Flags) + attrs.AddRtAttr(nl.IFLA_XDP_FLAGS, b) + } req.AddData(attrs) } @@ -1733,7 +2834,8 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { case nl.IFLA_XDP_FD: xdp.Fd = int(native.Uint32(attr.Value[0:4])) case nl.IFLA_XDP_ATTACHED: - xdp.Attached = attr.Value[0] != 0 + xdp.AttachMode = uint32(attr.Value[0]) + xdp.Attached = xdp.AttachMode != 0 case nl.IFLA_XDP_FLAGS: xdp.Flags = native.Uint32(attr.Value[0:4]) case nl.IFLA_XDP_PROG_ID: @@ -1744,29 +2846,44 @@ func parseLinkXdp(data []byte) (*LinkXdp, error) { } func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + if iptun.FlowBased { + // In flow based mode, no other attributes need to be configured + linkInfo.AddRtAttr(nl.IFLA_IPTUN_COLLECT_METADATA, boolAttr(iptun.FlowBased)) + return + } + + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) ip := iptun.Local.To4() if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LOCAL, []byte(ip)) + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) } ip = iptun.Remote.To4() if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_REMOTE, []byte(ip)) + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) } if iptun.Link != 0 { - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link)) + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(iptun.Link)) } - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc)) - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl)) - nl.NewRtAttrChild(data, nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(iptun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(iptun.Ttl)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(iptun.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(iptun.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto)) } func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { iptun := link.(*Iptun) for _, datum := range data { + // NOTE: same with vxlan, ip tunnel may also has null datum.Value + if len(datum.Value) == 0 { + continue + } switch datum.Attr.Type { case nl.IFLA_IPTUN_LOCAL: iptun.Local = net.IP(datum.Value[0:4]) @@ -1778,29 +2895,177 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { iptun.Tos = uint8(datum.Value[0]) case nl.IFLA_IPTUN_PMTUDISC: iptun.PMtuDisc = uint8(datum.Value[0]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + iptun.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + iptun.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_TYPE: + iptun.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + iptun.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_COLLECT_METADATA: + iptun.FlowBased = true + case nl.IFLA_IPTUN_PROTO: + iptun.Proto = datum.Value[0] + } + } +} + +func addIp6tnlAttrs(ip6tnl *Ip6tnl, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if ip6tnl.Link != 0 { + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(ip6tnl.Link)) + } + + ip := ip6tnl.Local.To16() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) + } + + ip = ip6tnl.Remote.To16() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) + } + + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(ip6tnl.Ttl)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(ip6tnl.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_FLAGS, nl.Uint32Attr(ip6tnl.Flags)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(ip6tnl.Proto)) + data.AddRtAttr(nl.IFLA_IPTUN_FLOWINFO, nl.Uint32Attr(ip6tnl.FlowInfo)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(ip6tnl.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(ip6tnl.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(ip6tnl.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(ip6tnl.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(ip6tnl.EncapDport)) +} + +func parseIp6tnlData(link Link, data []syscall.NetlinkRouteAttr) { + ip6tnl := link.(*Ip6tnl) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPTUN_LOCAL: + ip6tnl.Local = net.IP(datum.Value[:16]) + case nl.IFLA_IPTUN_REMOTE: + ip6tnl.Remote = net.IP(datum.Value[:16]) + case nl.IFLA_IPTUN_TTL: + ip6tnl.Ttl = datum.Value[0] + case nl.IFLA_IPTUN_TOS: + ip6tnl.Tos = datum.Value[0] + case nl.IFLA_IPTUN_FLAGS: + ip6tnl.Flags = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_PROTO: + ip6tnl.Proto = datum.Value[0] + case nl.IFLA_IPTUN_FLOWINFO: + ip6tnl.FlowInfo = native.Uint32(datum.Value[:4]) + case nl.IFLA_IPTUN_ENCAP_LIMIT: + ip6tnl.EncapLimit = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + ip6tnl.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + ip6tnl.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + ip6tnl.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + ip6tnl.EncapDport = ntohs(datum.Value[0:2]) + } + } +} + +func addSittunAttrs(sittun *Sittun, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + if sittun.Link != 0 { + data.AddRtAttr(nl.IFLA_IPTUN_LINK, nl.Uint32Attr(sittun.Link)) + } + + ip := sittun.Local.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_LOCAL, []byte(ip)) + } + + ip = sittun.Remote.To4() + if ip != nil { + data.AddRtAttr(nl.IFLA_IPTUN_REMOTE, []byte(ip)) + } + + if sittun.Ttl > 0 { + // Would otherwise fail on 3.10 kernel + data.AddRtAttr(nl.IFLA_IPTUN_TTL, nl.Uint8Attr(sittun.Ttl)) + } + + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(sittun.Proto)) + data.AddRtAttr(nl.IFLA_IPTUN_TOS, nl.Uint8Attr(sittun.Tos)) + data.AddRtAttr(nl.IFLA_IPTUN_PMTUDISC, nl.Uint8Attr(sittun.PMtuDisc)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_LIMIT, nl.Uint8Attr(sittun.EncapLimit)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_TYPE, nl.Uint16Attr(sittun.EncapType)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(sittun.EncapFlags)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(sittun.EncapSport)) + data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(sittun.EncapDport)) +} + +func parseSittunData(link Link, data []syscall.NetlinkRouteAttr) { + sittun := link.(*Sittun) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPTUN_LOCAL: + sittun.Local = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_REMOTE: + sittun.Remote = net.IP(datum.Value[0:4]) + case nl.IFLA_IPTUN_TTL: + sittun.Ttl = datum.Value[0] + case nl.IFLA_IPTUN_TOS: + sittun.Tos = datum.Value[0] + case nl.IFLA_IPTUN_PMTUDISC: + sittun.PMtuDisc = datum.Value[0] + case nl.IFLA_IPTUN_PROTO: + sittun.Proto = datum.Value[0] + case nl.IFLA_IPTUN_ENCAP_TYPE: + sittun.EncapType = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_FLAGS: + sittun.EncapFlags = native.Uint16(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_SPORT: + sittun.EncapSport = ntohs(datum.Value[0:2]) + case nl.IFLA_IPTUN_ENCAP_DPORT: + sittun.EncapDport = ntohs(datum.Value[0:2]) } } } func addVtiAttrs(vti *Vti, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + family := FAMILY_V4 + if vti.Local.To4() == nil { + family = FAMILY_V6 + } + + var ip net.IP - ip := vti.Local.To4() + if family == FAMILY_V4 { + ip = vti.Local.To4() + } else { + ip = vti.Local + } if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_VTI_LOCAL, []byte(ip)) + data.AddRtAttr(nl.IFLA_VTI_LOCAL, []byte(ip)) } - ip = vti.Remote.To4() + if family == FAMILY_V4 { + ip = vti.Remote.To4() + } else { + ip = vti.Remote + } if ip != nil { - nl.NewRtAttrChild(data, nl.IFLA_VTI_REMOTE, []byte(ip)) + data.AddRtAttr(nl.IFLA_VTI_REMOTE, []byte(ip)) } if vti.Link != 0 { - nl.NewRtAttrChild(data, nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link)) + data.AddRtAttr(nl.IFLA_VTI_LINK, nl.Uint32Attr(vti.Link)) } - nl.NewRtAttrChild(data, nl.IFLA_VTI_IKEY, htonl(vti.IKey)) - nl.NewRtAttrChild(data, nl.IFLA_VTI_OKEY, htonl(vti.OKey)) + data.AddRtAttr(nl.IFLA_VTI_IKEY, htonl(vti.IKey)) + data.AddRtAttr(nl.IFLA_VTI_OKEY, htonl(vti.OKey)) } func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) { @@ -1808,9 +3073,9 @@ func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) { for _, datum := range data { switch datum.Attr.Type { case nl.IFLA_VTI_LOCAL: - vti.Local = net.IP(datum.Value[0:4]) + vti.Local = net.IP(datum.Value) case nl.IFLA_VTI_REMOTE: - vti.Remote = net.IP(datum.Value[0:4]) + vti.Remote = net.IP(datum.Value) case nl.IFLA_VTI_IKEY: vti.IKey = ntohl(datum.Value[0:4]) case nl.IFLA_VTI_OKEY: @@ -1820,10 +3085,10 @@ func parseVtiData(link Link, data []syscall.NetlinkRouteAttr) { } func addVrfAttrs(vrf *Vrf, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) b := make([]byte, 4) native.PutUint32(b, uint32(vrf.Table)) - nl.NewRtAttrChild(data, nl.IFLA_VRF_TABLE, b) + data.AddRtAttr(nl.IFLA_VRF_TABLE, b) } func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) { @@ -1837,12 +3102,18 @@ func parseVrfData(link Link, data []syscall.NetlinkRouteAttr) { } func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) if bridge.MulticastSnooping != nil { - nl.NewRtAttrChild(data, nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping)) + data.AddRtAttr(nl.IFLA_BR_MCAST_SNOOPING, boolToByte(*bridge.MulticastSnooping)) + } + if bridge.AgeingTime != nil { + data.AddRtAttr(nl.IFLA_BR_AGEING_TIME, nl.Uint32Attr(*bridge.AgeingTime)) } if bridge.HelloTime != nil { - nl.NewRtAttrChild(data, nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime)) + data.AddRtAttr(nl.IFLA_BR_HELLO_TIME, nl.Uint32Attr(*bridge.HelloTime)) + } + if bridge.VlanFiltering != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) } } @@ -1850,23 +3121,29 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { br := bridge.(*Bridge) for _, datum := range data { switch datum.Attr.Type { + case nl.IFLA_BR_AGEING_TIME: + ageingTime := native.Uint32(datum.Value[0:4]) + br.AgeingTime = &ageingTime case nl.IFLA_BR_HELLO_TIME: helloTime := native.Uint32(datum.Value[0:4]) br.HelloTime = &helloTime case nl.IFLA_BR_MCAST_SNOOPING: mcastSnooping := datum.Value[0] == 1 br.MulticastSnooping = &mcastSnooping + case nl.IFLA_BR_VLAN_FILTERING: + vlanFiltering := datum.Value[0] == 1 + br.VlanFiltering = &vlanFiltering } } } func addGTPAttrs(gtp *GTP, linkInfo *nl.RtAttr) { - data := nl.NewRtAttrChild(linkInfo, nl.IFLA_INFO_DATA, nil) - nl.NewRtAttrChild(data, nl.IFLA_GTP_FD0, nl.Uint32Attr(uint32(gtp.FD0))) - nl.NewRtAttrChild(data, nl.IFLA_GTP_FD1, nl.Uint32Attr(uint32(gtp.FD1))) - nl.NewRtAttrChild(data, nl.IFLA_GTP_PDP_HASHSIZE, nl.Uint32Attr(131072)) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_GTP_FD0, nl.Uint32Attr(uint32(gtp.FD0))) + data.AddRtAttr(nl.IFLA_GTP_FD1, nl.Uint32Attr(uint32(gtp.FD1))) + data.AddRtAttr(nl.IFLA_GTP_PDP_HASHSIZE, nl.Uint32Attr(131072)) if gtp.Role != nl.GTP_ROLE_GGSN { - nl.NewRtAttrChild(data, nl.IFLA_GTP_ROLE, nl.Uint32Attr(uint32(gtp.Role))) + data.AddRtAttr(nl.IFLA_GTP_ROLE, nl.Uint32Attr(uint32(gtp.Role))) } } @@ -1885,3 +3162,304 @@ func parseGTPData(link Link, data []syscall.NetlinkRouteAttr) { } } } + +func parseVfInfoList(data []syscall.NetlinkRouteAttr) ([]VfInfo, error) { + var vfs []VfInfo + + for i, element := range data { + if element.Attr.Type != nl.IFLA_VF_INFO { + return nil, fmt.Errorf("Incorrect element type in vf info list: %d", element.Attr.Type) + } + vfAttrs, err := nl.ParseRouteAttr(element.Value) + if err != nil { + return nil, err + } + vfs = append(vfs, parseVfInfo(vfAttrs, i)) + } + return vfs, nil +} + +func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { + vf := VfInfo{ID: id} + for _, element := range data { + switch element.Attr.Type { + case nl.IFLA_VF_MAC: + mac := nl.DeserializeVfMac(element.Value[:]) + vf.Mac = mac.Mac[:6] + case nl.IFLA_VF_VLAN: + vl := nl.DeserializeVfVlan(element.Value[:]) + vf.Vlan = int(vl.Vlan) + vf.Qos = int(vl.Qos) + case nl.IFLA_VF_TX_RATE: + txr := nl.DeserializeVfTxRate(element.Value[:]) + vf.TxRate = int(txr.Rate) + case nl.IFLA_VF_SPOOFCHK: + sp := nl.DeserializeVfSpoofchk(element.Value[:]) + vf.Spoofchk = sp.Setting != 0 + case nl.IFLA_VF_LINK_STATE: + ls := nl.DeserializeVfLinkState(element.Value[:]) + vf.LinkState = ls.LinkState + case nl.IFLA_VF_RATE: + vfr := nl.DeserializeVfRate(element.Value[:]) + vf.MaxTxRate = vfr.MaxTxRate + vf.MinTxRate = vfr.MinTxRate + case nl.IFLA_VF_STATS: + vfstats := nl.DeserializeVfStats(element.Value[:]) + vf.RxPackets = vfstats.RxPackets + vf.TxPackets = vfstats.TxPackets + vf.RxBytes = vfstats.RxBytes + vf.TxBytes = vfstats.TxBytes + vf.Multicast = vfstats.Multicast + vf.Broadcast = vfstats.Broadcast + vf.RxDropped = vfstats.RxDropped + vf.TxDropped = vfstats.TxDropped + + case nl.IFLA_VF_RSS_QUERY_EN: + result := nl.DeserializeVfRssQueryEn(element.Value) + vf.RssQuery = result.Setting + + case nl.IFLA_VF_TRUST: + result := nl.DeserializeVfTrust(element.Value) + vf.Trust = result.Setting + } + } + return vf +} + +func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex))) + if xfrmi.Ifid != 0 { + data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) + } +} + +func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { + xfrmi := link.(*Xfrmi) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_XFRM_LINK: + xfrmi.ParentIndex = int(native.Uint32(datum.Value)) + case nl.IFLA_XFRM_IF_ID: + xfrmi.Ifid = native.Uint32(datum.Value) + } + } +} + +// LinkSetBondSlave add slave to bond link via ioctl interface. +func LinkSetBondSlave(link Link, master *Bond) error { + fd, err := getSocketUDP() + if err != nil { + return err + } + defer syscall.Close(fd) + + ifreq := newIocltSlaveReq(link.Attrs().Name, master.Attrs().Name) + + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), unix.SIOCBONDENSLAVE, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return fmt.Errorf("Failed to enslave %q to %q, errno=%v", link.Attrs().Name, master.Attrs().Name, errno) + } + return nil +} + +// LinkSetBondSlaveQueueId modify bond slave queue-id. +func (h *Handle) LinkSetBondSlaveQueueId(link Link, queueId uint16) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + linkInfo := nl.NewRtAttr(unix.IFLA_LINKINFO, nil) + data := linkInfo.AddRtAttr(nl.IFLA_INFO_SLAVE_DATA, nil) + data.AddRtAttr(nl.IFLA_BOND_SLAVE_QUEUE_ID, nl.Uint16Attr(queueId)) + + req.AddData(linkInfo) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetBondSlaveQueueId modify bond slave queue-id. +func LinkSetBondSlaveQueueId(link Link, queueId uint16) error { + return pkgHandle.LinkSetBondSlaveQueueId(link, queueId) +} + +func vethStatsSerialize(stats ethtoolStats) ([]byte, error) { + statsSize := int(unsafe.Sizeof(stats)) + int(stats.nStats)*int(unsafe.Sizeof(uint64(0))) + b := make([]byte, 0, statsSize) + buf := bytes.NewBuffer(b) + err := binary.Write(buf, nl.NativeEndian(), stats) + return buf.Bytes()[:statsSize], err +} + +type vethEthtoolStats struct { + Cmd uint32 + NStats uint32 + Peer uint64 + // Newer kernels have XDP stats in here, but we only care + // to extract the peer ifindex here. +} + +func vethStatsDeserialize(b []byte) (vethEthtoolStats, error) { + var stats = vethEthtoolStats{} + err := binary.Read(bytes.NewReader(b), nl.NativeEndian(), &stats) + return stats, err +} + +// VethPeerIndex get veth peer index. +func VethPeerIndex(link *Veth) (int, error) { + fd, err := getSocketUDP() + if err != nil { + return -1, err + } + defer syscall.Close(fd) + + ifreq, sSet := newIocltStringSetReq(link.Name) + _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno) + } + + stats := ethtoolStats{ + cmd: ETHTOOL_GSTATS, + nStats: sSet.data[0], + } + + buffer, err := vethStatsSerialize(stats) + if err != nil { + return -1, err + } + + ifreq.Data = uintptr(unsafe.Pointer(&buffer[0])) + _, _, errno = syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), SIOCETHTOOL, uintptr(unsafe.Pointer(ifreq))) + if errno != 0 { + return -1, fmt.Errorf("SIOCETHTOOL request for %q failed, errno=%v", link.Attrs().Name, errno) + } + + vstats, err := vethStatsDeserialize(buffer) + if err != nil { + return -1, err + } + + return int(vstats.Peer), nil +} + +func parseTuntapData(link Link, data []syscall.NetlinkRouteAttr) { + tuntap := link.(*Tuntap) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_TUN_OWNER: + tuntap.Owner = native.Uint32(datum.Value) + case nl.IFLA_TUN_GROUP: + tuntap.Group = native.Uint32(datum.Value) + case nl.IFLA_TUN_TYPE: + tuntap.Mode = TuntapMode(uint8(datum.Value[0])) + case nl.IFLA_TUN_PERSIST: + tuntap.NonPersist = false + if uint8(datum.Value[0]) == 0 { + tuntap.NonPersist = true + } + } + } +} + +func parseIPoIBData(link Link, data []syscall.NetlinkRouteAttr) { + ipoib := link.(*IPoIB) + for _, datum := range data { + switch datum.Attr.Type { + case nl.IFLA_IPOIB_PKEY: + ipoib.Pkey = uint16(native.Uint16(datum.Value)) + case nl.IFLA_IPOIB_MODE: + ipoib.Mode = IPoIBMode(native.Uint16(datum.Value)) + case nl.IFLA_IPOIB_UMCAST: + ipoib.Umcast = uint16(native.Uint16(datum.Value)) + } + } +} + +func parseCanData(link Link, data []syscall.NetlinkRouteAttr) { + can := link.(*Can) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.IFLA_CAN_BITTIMING: + can.BitRate = native.Uint32(datum.Value) + can.SamplePoint = native.Uint32(datum.Value[4:]) + can.TimeQuanta = native.Uint32(datum.Value[8:]) + can.PropagationSegment = native.Uint32(datum.Value[12:]) + can.PhaseSegment1 = native.Uint32(datum.Value[16:]) + can.PhaseSegment2 = native.Uint32(datum.Value[20:]) + can.SyncJumpWidth = native.Uint32(datum.Value[24:]) + can.BitRatePreScaler = native.Uint32(datum.Value[28:]) + case nl.IFLA_CAN_BITTIMING_CONST: + can.Name = string(datum.Value[:16]) + can.TimeSegment1Min = native.Uint32(datum.Value[16:]) + can.TimeSegment1Max = native.Uint32(datum.Value[20:]) + can.TimeSegment2Min = native.Uint32(datum.Value[24:]) + can.TimeSegment2Max = native.Uint32(datum.Value[28:]) + can.SyncJumpWidthMax = native.Uint32(datum.Value[32:]) + can.BitRatePreScalerMin = native.Uint32(datum.Value[36:]) + can.BitRatePreScalerMax = native.Uint32(datum.Value[40:]) + can.BitRatePreScalerInc = native.Uint32(datum.Value[44:]) + case nl.IFLA_CAN_CLOCK: + can.ClockFrequency = native.Uint32(datum.Value) + case nl.IFLA_CAN_STATE: + can.State = native.Uint32(datum.Value) + case nl.IFLA_CAN_CTRLMODE: + can.Mask = native.Uint32(datum.Value) + can.Flags = native.Uint32(datum.Value[4:]) + case nl.IFLA_CAN_BERR_COUNTER: + can.TxError = native.Uint16(datum.Value) + can.RxError = native.Uint16(datum.Value[2:]) + case nl.IFLA_CAN_RESTART_MS: + can.RestartMs = native.Uint32(datum.Value) + case nl.IFLA_CAN_DATA_BITTIMING_CONST: + case nl.IFLA_CAN_RESTART: + case nl.IFLA_CAN_DATA_BITTIMING: + case nl.IFLA_CAN_TERMINATION: + case nl.IFLA_CAN_TERMINATION_CONST: + case nl.IFLA_CAN_BITRATE_CONST: + case nl.IFLA_CAN_DATA_BITRATE_CONST: + case nl.IFLA_CAN_BITRATE_MAX: + } + } +} + +func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + data.AddRtAttr(nl.IFLA_IPOIB_PKEY, nl.Uint16Attr(uint16(ipoib.Pkey))) + data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode))) + data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast))) +} + +func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port))) + data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType))) + if bareudp.SrcPortMin != 0 { + data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin)) + } + if bareudp.MultiProto { + data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{}) + } +} + +func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) { + bareudp := link.(*BareUDP) + for _, attr := range data { + switch attr.Attr.Type { + case nl.IFLA_BAREUDP_PORT: + bareudp.Port = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_ETHERTYPE: + bareudp.EtherType = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_SRCPORT_MIN: + bareudp.SrcPortMin = native.Uint16(attr.Value) + case nl.IFLA_BAREUDP_MULTIPROTO_MODE: + bareudp.MultiProto = true + } + } +} diff --git a/vendor/github.com/vishvananda/netlink/neigh.go b/vendor/github.com/vishvananda/netlink/neigh.go index 0e5eb90c9e..32d722e885 100644 --- a/vendor/github.com/vishvananda/netlink/neigh.go +++ b/vendor/github.com/vishvananda/netlink/neigh.go @@ -12,11 +12,22 @@ type Neigh struct { State int Type int Flags int + FlagsExt int IP net.IP HardwareAddr net.HardwareAddr + LLIPAddr net.IP //Used in the case of NHRP + Vlan int + VNI int + MasterIndex int } // String returns $ip/$hwaddr $label func (neigh *Neigh) String() string { return fmt.Sprintf("%s %s", neigh.IP, neigh.HardwareAddr) } + +// NeighUpdate is sent when a neighbor changes - type is RTM_NEWNEIGH or RTM_DELNEIGH. +type NeighUpdate struct { + Type uint16 + Neigh +} diff --git a/vendor/github.com/vishvananda/netlink/neigh_linux.go b/vendor/github.com/vishvananda/netlink/neigh_linux.go index f069db2573..4c1e766351 100644 --- a/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -1,11 +1,14 @@ package netlink import ( + "fmt" "net" "syscall" "unsafe" "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) const ( @@ -18,7 +21,14 @@ const ( NDA_PORT NDA_VNI NDA_IFINDEX - NDA_MAX = NDA_IFINDEX + NDA_MASTER + NDA_LINK_NETNSID + NDA_SRC_VNI + NDA_PROTOCOL + NDA_NH_ID + NDA_FDB_EXT_ATTRS + NDA_FLAGS_EXT + NDA_MAX = NDA_FLAGS_EXT ) // Neighbor Cache Entry States. @@ -36,13 +46,22 @@ const ( // Neighbor Flags const ( - NTF_USE = 0x01 - NTF_SELF = 0x02 - NTF_MASTER = 0x04 - NTF_PROXY = 0x08 - NTF_ROUTER = 0x80 + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_EXT_LEARNED = 0x10 + NTF_OFFLOADED = 0x20 + NTF_STICKY = 0x40 + NTF_ROUTER = 0x80 ) +// Extended Neighbor Flags +const ( + NTF_EXT_MANAGED = 0x00000001 +) + +// Ndmsg is for adding, removing or receiving information about a neighbor table entry type Ndmsg struct { Family uint8 Index uint32 @@ -73,7 +92,7 @@ func NeighAdd(neigh *Neigh) error { // NeighAdd will add an IP to MAC mapping to the ARP table // Equivalent to: `ip neigh add ....` func (h *Handle) NeighAdd(neigh *Neigh) error { - return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL) + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_EXCL) } // NeighSet will add or replace an IP to MAC mapping to the ARP table @@ -85,7 +104,7 @@ func NeighSet(neigh *Neigh) error { // NeighSet will add or replace an IP to MAC mapping to the ARP table // Equivalent to: `ip neigh replace....` func (h *Handle) NeighSet(neigh *Neigh) error { - return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE) + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_REPLACE) } // NeighAppend will append an entry to FDB @@ -97,7 +116,7 @@ func NeighAppend(neigh *Neigh) error { // NeighAppend will append an entry to FDB // Equivalent to: `bridge fdb append...` func (h *Handle) NeighAppend(neigh *Neigh) error { - return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND) + return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_APPEND) } // NeighAppend will append an entry to FDB @@ -109,7 +128,7 @@ func neighAdd(neigh *Neigh, mode int) error { // NeighAppend will append an entry to FDB // Equivalent to: `bridge fdb append...` func (h *Handle) neighAdd(neigh *Neigh, mode int) error { - req := h.newNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWNEIGH, mode|unix.NLM_F_ACK) return neighHandle(neigh, req) } @@ -122,12 +141,13 @@ func NeighDel(neigh *Neigh) error { // NeighDel will delete an IP address from a link device. // Equivalent to: `ip addr del $addr dev $link` func (h *Handle) NeighDel(neigh *Neigh) error { - req := h.newNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELNEIGH, unix.NLM_F_ACK) return neighHandle(neigh, req) } func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { var family int + if neigh.Family > 0 { family = neigh.Family } else { @@ -151,53 +171,84 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { dstData := nl.NewRtAttr(NDA_DST, ipData) req.AddData(dstData) - if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { + if neigh.LLIPAddr != nil { + llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) + req.AddData(llIPData) + } else if neigh.HardwareAddr != nil { hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) req.AddData(hwData) } - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if neigh.FlagsExt != 0 { + flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt))) + req.AddData(flagsExtData) + } + + if neigh.Vlan != 0 { + vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan))) + req.AddData(vlanData) + } + + if neigh.VNI != 0 { + vniData := nl.NewRtAttr(NDA_VNI, nl.Uint32Attr(uint32(neigh.VNI))) + req.AddData(vniData) + } + + if neigh.MasterIndex != 0 { + masterData := nl.NewRtAttr(NDA_MASTER, nl.Uint32Attr(uint32(neigh.MasterIndex))) + req.AddData(masterData) + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } -// NeighList gets a list of IP-MAC mappings in the system (ARP table). +// NeighList returns a list of IP-MAC mappings in the system (ARP table). // Equivalent to: `ip neighbor show`. // The list can be filtered by link and ip family. func NeighList(linkIndex, family int) ([]Neigh, error) { return pkgHandle.NeighList(linkIndex, family) } -// NeighProxyList gets a list of neighbor proxies in the system. +// NeighProxyList returns a list of neighbor proxies in the system. // Equivalent to: `ip neighbor show proxy`. // The list can be filtered by link and ip family. func NeighProxyList(linkIndex, family int) ([]Neigh, error) { return pkgHandle.NeighProxyList(linkIndex, family) } -// NeighList gets a list of IP-MAC mappings in the system (ARP table). +// NeighList returns a list of IP-MAC mappings in the system (ARP table). // Equivalent to: `ip neighbor show`. // The list can be filtered by link and ip family. func (h *Handle) NeighList(linkIndex, family int) ([]Neigh, error) { - return h.neighList(linkIndex, family, 0) + return h.NeighListExecute(Ndmsg{ + Family: uint8(family), + Index: uint32(linkIndex), + }) } -// NeighProxyList gets a list of neighbor proxies in the system. +// NeighProxyList returns a list of neighbor proxies in the system. // Equivalent to: `ip neighbor show proxy`. // The list can be filtered by link, ip family. func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) { - return h.neighList(linkIndex, family, NTF_PROXY) -} - -func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) { - req := h.newNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP) - msg := Ndmsg{ + return h.NeighListExecute(Ndmsg{ Family: uint8(family), Index: uint32(linkIndex), - Flags: uint8(flags), - } + Flags: NTF_PROXY, + }) +} + +// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state. +func NeighListExecute(msg Ndmsg) ([]Neigh, error) { + return pkgHandle.NeighListExecute(msg) +} + +// NeighListExecute returns a list of neighbour entries filtered by link, ip family, flag and state. +func (h *Handle) NeighListExecute(msg Ndmsg) ([]Neigh, error) { + req := h.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) req.AddData(&msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWNEIGH) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH) if err != nil { return nil, err } @@ -205,10 +256,22 @@ func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) { var res []Neigh for _, m := range msgs { ndm := deserializeNdmsg(m) - if linkIndex != 0 && int(ndm.Index) != linkIndex { + if msg.Index != 0 && ndm.Index != msg.Index { // Ignore messages from other interfaces continue } + if msg.Family != 0 && ndm.Family != msg.Family { + continue + } + if msg.State != 0 && ndm.State != msg.State { + continue + } + if msg.Type != 0 && ndm.Type != msg.Type { + continue + } + if msg.Flags != 0 && ndm.Flags != msg.Flags { + continue + } neigh, err := NeighDeserialize(m) if err != nil { @@ -242,9 +305,148 @@ func NeighDeserialize(m []byte) (*Neigh, error) { case NDA_DST: neigh.IP = net.IP(attr.Value) case NDA_LLADDR: - neigh.HardwareAddr = net.HardwareAddr(attr.Value) + // BUG: Is this a bug in the netlink library? + // #define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) + // #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + attrLen := attr.Attr.Len - unix.SizeofRtAttr + if attrLen == 4 { + neigh.LLIPAddr = net.IP(attr.Value) + } else if attrLen == 16 { + // Can be IPv6 or FireWire HWAddr + link, err := LinkByIndex(neigh.LinkIndex) + if err == nil && link.Attrs().EncapType == "tunnel6" { + neigh.IP = net.IP(attr.Value) + } else { + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } + } else { + neigh.HardwareAddr = net.HardwareAddr(attr.Value) + } + case NDA_FLAGS_EXT: + neigh.FlagsExt = int(native.Uint32(attr.Value[0:4])) + case NDA_VLAN: + neigh.Vlan = int(native.Uint16(attr.Value[0:2])) + case NDA_VNI: + neigh.VNI = int(native.Uint32(attr.Value[0:4])) + case NDA_MASTER: + neigh.MasterIndex = int(native.Uint32(attr.Value[0:4])) } } return &neigh, nil } + +// NeighSubscribe takes a chan down which notifications will be sent +// when neighbors are added or deleted. Close the 'done' chan to stop subscription. +func NeighSubscribe(ch chan<- NeighUpdate, done <-chan struct{}) error { + return neighSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) +} + +// NeighSubscribeAt works like NeighSubscribe plus it allows the caller +// to choose the network namespace in which to subscribe (ns). +func NeighSubscribeAt(ns netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}) error { + return neighSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// NeighSubscribeOptions contains a set of options to use with +// NeighSubscribeWithOptions. +type NeighSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool +} + +// NeighSubscribeWithOptions work like NeighSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, options NeighSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return neighSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) + makeRequest := func(family int) error { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(family) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + return nil + } + if err != nil { + return err + } + if done != nil { + go func() { + <-done + s.Close() + }() + } + if listExisting { + if err := makeRequest(unix.AF_UNSPEC); err != nil { + return err + } + // We have to wait for NLMSG_DONE before making AF_BRIDGE request + } + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + if cberr != nil { + cberr(err) + } + return + } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } + for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + if listExisting { + // This will be called after handling AF_UNSPEC + // list request, we have to wait for NLMSG_DONE + // before making another request + if err := makeRequest(unix.AF_BRIDGE); err != nil { + if cberr != nil { + cberr(err) + } + return + } + listExisting = false + } + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(syscall.Errno(-error)) + } + return + } + neigh, err := NeighDeserialize(m.Data) + if err != nil { + if cberr != nil { + cberr(err) + } + return + } + ch <- NeighUpdate{Type: m.Header.Type, Neigh: *neigh} + } + } + }() + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/netlink.go b/vendor/github.com/vishvananda/netlink/netlink.go index fb159526e3..9cb685dc81 100644 --- a/vendor/github.com/vishvananda/netlink/netlink.go +++ b/vendor/github.com/vishvananda/netlink/netlink.go @@ -27,7 +27,8 @@ func ParseIPNet(s string) (*net.IPNet, error) { if err != nil { return nil, err } - return &net.IPNet{IP: ip, Mask: ipNet.Mask}, nil + ipNet.IP = ip + return ipNet, nil } // NewIPNet generates an IPNet from an ip address using a netmask of 32 or 128. diff --git a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 2d57c16d74..98d2c0dbf3 100644 --- a/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -16,7 +16,7 @@ func LinkSetMTU(link Link, mtu int) error { return ErrNotImplemented } -func LinkSetMaster(link Link, master *Bridge) error { +func LinkSetMaster(link Link, master Link) error { return ErrNotImplemented } @@ -48,10 +48,18 @@ func LinkSetVfVlan(link Link, vf, vlan int) error { return ErrNotImplemented } +func LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { + return ErrNotImplemented +} + func LinkSetVfTxRate(link Link, vf, rate int) error { return ErrNotImplemented } +func LinkSetVfRate(link Link, vf, minRate, maxRate int) error { + return ErrNotImplemented +} + func LinkSetNoMaster(link Link) error { return ErrNotImplemented } @@ -64,6 +72,10 @@ func LinkSetXdpFd(link Link, fd int) error { return ErrNotImplemented } +func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { + return ErrNotImplemented +} + func LinkSetARPOff(link Link) error { return ErrNotImplemented } @@ -108,6 +120,10 @@ func LinkSetFlood(link Link, mode bool) error { return ErrNotImplemented } +func LinkSetTxQLen(link Link, qlen int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } @@ -148,6 +164,10 @@ func AddrAdd(link Link, addr *Addr) error { return ErrNotImplemented } +func AddrReplace(link Link, addr *Addr) error { + return ErrNotImplemented +} + func AddrDel(link Link, addr *Addr) error { return ErrNotImplemented } @@ -160,14 +180,30 @@ func RouteAdd(route *Route) error { return ErrNotImplemented } +func RouteAppend(route *Route) error { + return ErrNotImplemented +} + func RouteDel(route *Route) error { return ErrNotImplemented } +func RouteGet(destination net.IP) ([]Route, error) { + return nil, ErrNotImplemented +} + func RouteList(link Link, family int) ([]Route, error) { return nil, ErrNotImplemented } +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteReplace(route *Route) error { + return ErrNotImplemented +} + func XfrmPolicyAdd(policy *XfrmPolicy) error { return ErrNotImplemented } diff --git a/vendor/github.com/vishvananda/netlink/netns_linux.go b/vendor/github.com/vishvananda/netlink/netns_linux.go new file mode 100644 index 0000000000..2eb29c7ce0 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netns_linux.go @@ -0,0 +1,141 @@ +package netlink + +// Network namespace ID functions +// +// The kernel has a weird concept called the network namespace ID. +// This is different from the file reference in proc (and any bind-mounted +// namespaces, etc.) +// +// Instead, namespaces can be assigned a numeric ID at any time. Once set, +// the ID is fixed. The ID can either be set manually by the user, or +// automatically, triggered by certain kernel actions. The most common kernel +// action that triggers namespace ID creation is moving one end of a veth pair +// in to that namespace. + +import ( + "fmt" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// These can be replaced by the values from sys/unix when it is next released. +const ( + _ = iota + NETNSA_NSID + NETNSA_PID + NETNSA_FD +) + +// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id). +// Returns -1 if the namespace does not have an ID set. +func (h *Handle) GetNetNsIdByPid(pid int) (int, error) { + return h.getNetNsId(NETNSA_PID, uint32(pid)) +} + +// GetNetNsIdByPid looks up the network namespace ID for a given pid (really thread id). +// Returns -1 if the namespace does not have an ID set. +func GetNetNsIdByPid(pid int) (int, error) { + return pkgHandle.GetNetNsIdByPid(pid) +} + +// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id). +// The ID can only be set for namespaces without an ID already set. +func (h *Handle) SetNetNsIdByPid(pid, nsid int) error { + return h.setNetNsId(NETNSA_PID, uint32(pid), uint32(nsid)) +} + +// SetNetNSIdByPid sets the ID of the network namespace for a given pid (really thread id). +// The ID can only be set for namespaces without an ID already set. +func SetNetNsIdByPid(pid, nsid int) error { + return pkgHandle.SetNetNsIdByPid(pid, nsid) +} + +// GetNetNsIdByFd looks up the network namespace ID for a given fd. +// fd must be an open file descriptor to a namespace file. +// Returns -1 if the namespace does not have an ID set. +func (h *Handle) GetNetNsIdByFd(fd int) (int, error) { + return h.getNetNsId(NETNSA_FD, uint32(fd)) +} + +// GetNetNsIdByFd looks up the network namespace ID for a given fd. +// fd must be an open file descriptor to a namespace file. +// Returns -1 if the namespace does not have an ID set. +func GetNetNsIdByFd(fd int) (int, error) { + return pkgHandle.GetNetNsIdByFd(fd) +} + +// SetNetNSIdByFd sets the ID of the network namespace for a given fd. +// fd must be an open file descriptor to a namespace file. +// The ID can only be set for namespaces without an ID already set. +func (h *Handle) SetNetNsIdByFd(fd, nsid int) error { + return h.setNetNsId(NETNSA_FD, uint32(fd), uint32(nsid)) +} + +// SetNetNSIdByFd sets the ID of the network namespace for a given fd. +// fd must be an open file descriptor to a namespace file. +// The ID can only be set for namespaces without an ID already set. +func SetNetNsIdByFd(fd, nsid int) error { + return pkgHandle.SetNetNsIdByFd(fd, nsid) +} + +// getNetNsId requests the netnsid for a given type-val pair +// type should be either NETNSA_PID or NETNSA_FD +func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) { + req := h.newNetlinkRequest(unix.RTM_GETNSID, unix.NLM_F_REQUEST) + + rtgen := nl.NewRtGenMsg() + req.AddData(rtgen) + + b := make([]byte, 4) + native.PutUint32(b, val) + attr := nl.NewRtAttr(attrType, b) + req.AddData(attr) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID) + + if err != nil { + return 0, err + } + + for _, m := range msgs { + msg := nl.DeserializeRtGenMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return 0, err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case NETNSA_NSID: + return int(int32(native.Uint32(attr.Value))), nil + } + } + } + + return 0, fmt.Errorf("unexpected empty result") +} + +// setNetNsId sets the netnsid for a given type-val pair +// type should be either NETNSA_PID or NETNSA_FD +// The ID can only be set for namespaces without an ID already set +func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error { + req := h.newNetlinkRequest(unix.RTM_NEWNSID, unix.NLM_F_REQUEST|unix.NLM_F_ACK) + + rtgen := nl.NewRtGenMsg() + req.AddData(rtgen) + + b := make([]byte, 4) + native.PutUint32(b, val) + attr := nl.NewRtAttr(attrType, b) + req.AddData(attr) + + b1 := make([]byte, 4) + native.PutUint32(b1, newnsid) + attr1 := nl.NewRtAttr(NETNSA_NSID, b1) + req.AddData(attr1) + + _, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNSID) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/netns_unspecified.go b/vendor/github.com/vishvananda/netlink/netns_unspecified.go new file mode 100644 index 0000000000..5c5899e362 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/netns_unspecified.go @@ -0,0 +1,19 @@ +// +build !linux + +package netlink + +func GetNetNsIdByPid(pid int) (int, error) { + return 0, ErrNotImplemented +} + +func SetNetNsIdByPid(pid, nsid int) error { + return ErrNotImplemented +} + +func GetNetNsIdByFd(fd int) (int, error) { + return 0, ErrNotImplemented +} + +func SetNetNsIdByFd(fd, nsid int) error { + return ErrNotImplemented +} diff --git a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go index fe362e9fa7..6bea4ed02f 100644 --- a/vendor/github.com/vishvananda/netlink/nl/addr_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/addr_linux.go @@ -1,17 +1,18 @@ package nl import ( - "syscall" "unsafe" + + "golang.org/x/sys/unix" ) type IfAddrmsg struct { - syscall.IfAddrmsg + unix.IfAddrmsg } func NewIfAddrmsg(family int) *IfAddrmsg { return &IfAddrmsg{ - IfAddrmsg: syscall.IfAddrmsg{ + IfAddrmsg: unix.IfAddrmsg{ Family: uint8(family), }, } @@ -35,15 +36,15 @@ func NewIfAddrmsg(family int) *IfAddrmsg { // SizeofIfAddrmsg = 0x8 func DeserializeIfAddrmsg(b []byte) *IfAddrmsg { - return (*IfAddrmsg)(unsafe.Pointer(&b[0:syscall.SizeofIfAddrmsg][0])) + return (*IfAddrmsg)(unsafe.Pointer(&b[0:unix.SizeofIfAddrmsg][0])) } func (msg *IfAddrmsg) Serialize() []byte { - return (*(*[syscall.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:] } func (msg *IfAddrmsg) Len() int { - return syscall.SizeofIfAddrmsg + return unix.SizeofIfAddrmsg } // struct ifa_cacheinfo { @@ -53,24 +54,18 @@ func (msg *IfAddrmsg) Len() int { // __u32 tstamp; /* updated timestamp, hundredths of seconds */ // }; -const IFA_CACHEINFO = 6 -const SizeofIfaCacheInfo = 0x10 - type IfaCacheInfo struct { - IfaPrefered uint32 - IfaValid uint32 - Cstamp uint32 - Tstamp uint32 + unix.IfaCacheinfo } func (msg *IfaCacheInfo) Len() int { - return SizeofIfaCacheInfo + return unix.SizeofIfaCacheinfo } func DeserializeIfaCacheInfo(b []byte) *IfaCacheInfo { - return (*IfaCacheInfo)(unsafe.Pointer(&b[0:SizeofIfaCacheInfo][0])) + return (*IfaCacheInfo)(unsafe.Pointer(&b[0:unix.SizeofIfaCacheinfo][0])) } func (msg *IfaCacheInfo) Serialize() []byte { - return (*(*[SizeofIfaCacheInfo]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofIfaCacheinfo]byte)(unsafe.Pointer(msg)))[:] } diff --git a/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go index 6c0d333387..34e78ba8da 100644 --- a/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/bridge_linux.go @@ -11,8 +11,8 @@ const ( /* Bridge Flags */ const ( - BRIDGE_FLAGS_MASTER = iota /* Bridge command to/from master */ - BRIDGE_FLAGS_SELF /* Bridge command to/from lowerdev */ + BRIDGE_FLAGS_MASTER = iota + 1 /* Bridge command to/from master */ + BRIDGE_FLAGS_SELF /* Bridge command to/from lowerdev */ ) /* Bridge management nested attributes diff --git a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 380cc5967b..183601803b 100644 --- a/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -40,9 +40,11 @@ const ( NFNETLINK_V0 = 0 ) -// #define NLA_F_NESTED (1 << 15) const ( - NLA_F_NESTED = (1 << 15) + NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15) + NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14) + NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4 ) // enum ctattr_type { @@ -76,12 +78,17 @@ const ( // __CTA_MAX // }; const ( - CTA_TUPLE_ORIG = 1 - CTA_TUPLE_REPLY = 2 - CTA_STATUS = 3 - CTA_TIMEOUT = 7 - CTA_MARK = 8 - CTA_PROTOINFO = 4 + CTA_TUPLE_ORIG = 1 + CTA_TUPLE_REPLY = 2 + CTA_STATUS = 3 + CTA_PROTOINFO = 4 + CTA_TIMEOUT = 7 + CTA_MARK = 8 + CTA_COUNTERS_ORIG = 9 + CTA_COUNTERS_REPLY = 10 + CTA_USE = 11 + CTA_ID = 12 + CTA_TIMESTAMP = 20 ) // enum ctattr_tuple { @@ -163,6 +170,29 @@ const ( CTA_PROTOINFO_TCP_FLAGS_REPLY = 5 ) +// enum ctattr_counters { +// CTA_COUNTERS_UNSPEC, +// CTA_COUNTERS_PACKETS, /* 64bit counters */ +// CTA_COUNTERS_BYTES, /* 64bit counters */ +// CTA_COUNTERS32_PACKETS, /* old 32bit counters, unused */ +// CTA_COUNTERS32_BYTES, /* old 32bit counters, unused */ +// CTA_COUNTERS_PAD, +// __CTA_COUNTERS_M +// }; +// #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) +const ( + CTA_COUNTERS_PACKETS = 1 + CTA_COUNTERS_BYTES = 2 +) + +// enum CTA TIMESTAMP TLVs +// CTA_TIMESTAMP_START /* 64bit value */ +// CTA_TIMESTAMP_STOP /* 64bit value */ +const ( + CTA_TIMESTAMP_START = 1 + CTA_TIMESTAMP_STOP = 2 +) + // /* General form of address family dependent message. // */ // struct nfgenmsg { diff --git a/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go new file mode 100644 index 0000000000..2995da492f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -0,0 +1,96 @@ +package nl + +// All the following constants are coming from: +// https://github.com/torvalds/linux/blob/master/include/uapi/linux/devlink.h + +const ( + GENL_DEVLINK_VERSION = 1 + GENL_DEVLINK_NAME = "devlink" +) + +const ( + DEVLINK_CMD_GET = 1 + DEVLINK_CMD_PORT_GET = 5 + DEVLINK_CMD_PORT_SET = 6 + DEVLINK_CMD_PORT_NEW = 7 + DEVLINK_CMD_PORT_DEL = 8 + DEVLINK_CMD_ESWITCH_GET = 29 + DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_INFO_GET = 51 +) + +const ( + DEVLINK_ATTR_BUS_NAME = 1 + DEVLINK_ATTR_DEV_NAME = 2 + DEVLINK_ATTR_PORT_INDEX = 3 + DEVLINK_ATTR_PORT_TYPE = 4 + DEVLINK_ATTR_PORT_NETDEV_IFINDEX = 6 + DEVLINK_ATTR_PORT_NETDEV_NAME = 7 + DEVLINK_ATTR_PORT_IBDEV_NAME = 8 + DEVLINK_ATTR_ESWITCH_MODE = 25 + DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 + DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 + DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 + DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 + DEVLINK_ATTR_PORT_FUNCTION = 145 + DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 + DEVLINK_ATTR_PORT_PCI_SF_NUMBER = 164 +) + +const ( + DEVLINK_ESWITCH_MODE_LEGACY = 0 + DEVLINK_ESWITCH_MODE_SWITCHDEV = 1 +) + +const ( + DEVLINK_ESWITCH_INLINE_MODE_NONE = 0 + DEVLINK_ESWITCH_INLINE_MODE_LINK = 1 + DEVLINK_ESWITCH_INLINE_MODE_NETWORK = 2 + DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT = 3 +) + +const ( + DEVLINK_ESWITCH_ENCAP_MODE_NONE = 0 + DEVLINK_ESWITCH_ENCAP_MODE_BASIC = 1 +) + +const ( + DEVLINK_PORT_FLAVOUR_PHYSICAL = 0 + DEVLINK_PORT_FLAVOUR_CPU = 1 + DEVLINK_PORT_FLAVOUR_DSA = 2 + DEVLINK_PORT_FLAVOUR_PCI_PF = 3 + DEVLINK_PORT_FLAVOUR_PCI_VF = 4 + DEVLINK_PORT_FLAVOUR_VIRTUAL = 5 + DEVLINK_PORT_FLAVOUR_UNUSED = 6 + DEVLINK_PORT_FLAVOUR_PCI_SF = 7 +) + +const ( + DEVLINK_PORT_TYPE_NOTSET = 0 + DEVLINK_PORT_TYPE_AUTO = 1 + DEVLINK_PORT_TYPE_ETH = 2 + DEVLINK_PORT_TYPE_IB = 3 +) + +const ( + DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR = 1 + DEVLINK_PORT_FN_ATTR_STATE = 2 + DEVLINK_PORT_FN_ATTR_OPSTATE = 3 +) + +const ( + DEVLINK_PORT_FN_STATE_INACTIVE = 0 + DEVLINK_PORT_FN_STATE_ACTIVE = 1 +) + +const ( + DEVLINK_PORT_FN_OPSTATE_DETACHED = 0 + DEVLINK_PORT_FN_OPSTATE_ATTACHED = 1 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go new file mode 100644 index 0000000000..a60b4b09d9 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -0,0 +1,222 @@ +package nl + +import ( + "strconv" + + "golang.org/x/sys/unix" +) + +const ( + /* The protocol version */ + IPSET_PROTOCOL = 6 + + /* The max length of strings including NUL: set and type identifiers */ + IPSET_MAXNAMELEN = 32 + + /* The maximum permissible comment length we will accept over netlink */ + IPSET_MAX_COMMENT_SIZE = 255 +) + +const ( + _ = iota + IPSET_CMD_PROTOCOL /* 1: Return protocol version */ + IPSET_CMD_CREATE /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME /* 5: Rename a set */ + IPSET_CMD_SWAP /* 6: Swap two sets */ + IPSET_CMD_LIST /* 7: List sets */ + IPSET_CMD_SAVE /* 8: Save sets */ + IPSET_CMD_ADD /* 9: Add an element to a set */ + IPSET_CMD_DEL /* 10: Delete an element from a set */ + IPSET_CMD_TEST /* 11: Test an element in a set */ + IPSET_CMD_HEADER /* 12: Get set header data only */ + IPSET_CMD_TYPE /* 13: Get set type */ +) + +/* Attributes at command level */ +const ( + _ = iota + IPSET_ATTR_PROTOCOL /* 1: Protocol version */ + IPSET_ATTR_SETNAME /* 2: Name of the set */ + IPSET_ATTR_TYPENAME /* 3: Typename */ + IPSET_ATTR_REVISION /* 4: Settype revision */ + IPSET_ATTR_FAMILY /* 5: Settype family */ + IPSET_ATTR_FLAGS /* 6: Flags at command level */ + IPSET_ATTR_DATA /* 7: Nested attributes */ + IPSET_ATTR_ADT /* 8: Multiple data containers */ + IPSET_ATTR_LINENO /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN /* 10: Minimal supported version number */ + + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME /* Setname at rename/swap */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN /* type rev min */ +) + +/* CADT specific attributes */ +const ( + IPSET_ATTR_IP = 1 + IPSET_ATTR_IP_FROM = 1 + IPSET_ATTR_IP_TO = 2 + IPSET_ATTR_CIDR = 3 + IPSET_ATTR_PORT = 4 + IPSET_ATTR_PORT_FROM = 4 + IPSET_ATTR_PORT_TO = 5 + IPSET_ATTR_TIMEOUT = 6 + IPSET_ATTR_PROTO = 7 + IPSET_ATTR_CADT_FLAGS = 8 + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO /* 9 */ + IPSET_ATTR_MARK = 10 + IPSET_ATTR_MARKMASK = 11 + + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16 + + /* Create-only specific attributes */ + IPSET_ATTR_GC = 3 + iota + IPSET_ATTR_HASHSIZE + IPSET_ATTR_MAXELEM + IPSET_ATTR_NETMASK + IPSET_ATTR_PROBES + IPSET_ATTR_RESIZE + IPSET_ATTR_SIZE + + /* Kernel-only */ + IPSET_ATTR_ELEMENTS + IPSET_ATTR_REFERENCES + IPSET_ATTR_MEMSIZE + + SET_ATTR_CREATE_MAX +) + +/* ADT specific attributes */ +const ( + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 + IPSET_ATTR_NAME + IPSET_ATTR_NAMEREF + IPSET_ATTR_IP2 + IPSET_ATTR_CIDR2 + IPSET_ATTR_IP2_TO + IPSET_ATTR_IFACE + IPSET_ATTR_BYTES + IPSET_ATTR_PACKETS + IPSET_ATTR_COMMENT + IPSET_ATTR_SKBMARK + IPSET_ATTR_SKBPRIO + IPSET_ATTR_SKBQUEUE +) + +/* Flags at CADT attribute level, upper half of cmdattrs */ +const ( + IPSET_FLAG_BIT_BEFORE = 0 + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE) + IPSET_FLAG_BIT_PHYSDEV = 1 + IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV) + IPSET_FLAG_BIT_NOMATCH = 2 + IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH) + IPSET_FLAG_BIT_WITH_COUNTERS = 3 + IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS) + IPSET_FLAG_BIT_WITH_COMMENT = 4 + IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT) + IPSET_FLAG_BIT_WITH_FORCEADD = 5 + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD) + IPSET_FLAG_BIT_WITH_SKBINFO = 6 + IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO) + IPSET_FLAG_CADT_MAX = 15 +) + +const ( + IPSET_ERR_PRIVATE = 4096 + iota + IPSET_ERR_PROTOCOL + IPSET_ERR_FIND_TYPE + IPSET_ERR_MAX_SETS + IPSET_ERR_BUSY + IPSET_ERR_EXIST_SETNAME2 + IPSET_ERR_TYPE_MISMATCH + IPSET_ERR_EXIST + IPSET_ERR_INVALID_CIDR + IPSET_ERR_INVALID_NETMASK + IPSET_ERR_INVALID_FAMILY + IPSET_ERR_TIMEOUT + IPSET_ERR_REFERENCED + IPSET_ERR_IPADDR_IPV4 + IPSET_ERR_IPADDR_IPV6 + IPSET_ERR_COUNTER + IPSET_ERR_COMMENT + IPSET_ERR_INVALID_MARKMASK + IPSET_ERR_SKBINFO + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352 +) + +type IPSetError uintptr + +func (e IPSetError) Error() string { + switch int(e) { + case IPSET_ERR_PRIVATE: + return "private" + case IPSET_ERR_PROTOCOL: + return "invalid protocol" + case IPSET_ERR_FIND_TYPE: + return "invalid type" + case IPSET_ERR_MAX_SETS: + return "max sets reached" + case IPSET_ERR_BUSY: + return "busy" + case IPSET_ERR_EXIST_SETNAME2: + return "exist_setname2" + case IPSET_ERR_TYPE_MISMATCH: + return "type mismatch" + case IPSET_ERR_EXIST: + return "exist" + case IPSET_ERR_INVALID_CIDR: + return "invalid cidr" + case IPSET_ERR_INVALID_NETMASK: + return "invalid netmask" + case IPSET_ERR_INVALID_FAMILY: + return "invalid family" + case IPSET_ERR_TIMEOUT: + return "timeout" + case IPSET_ERR_REFERENCED: + return "referenced" + case IPSET_ERR_IPADDR_IPV4: + return "invalid ipv4 address" + case IPSET_ERR_IPADDR_IPV6: + return "invalid ipv6 address" + case IPSET_ERR_COUNTER: + return "invalid counter" + case IPSET_ERR_COMMENT: + return "invalid comment" + case IPSET_ERR_INVALID_MARKMASK: + return "invalid markmask" + case IPSET_ERR_SKBINFO: + return "skbinfo" + default: + return "errno " + strconv.Itoa(int(e)) + } +} + +func GetIpsetFlags(cmd int) int { + switch cmd { + case IPSET_CMD_CREATE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_CREATE + case IPSET_CMD_DESTROY, + IPSET_CMD_FLUSH, + IPSET_CMD_RENAME, + IPSET_CMD_SWAP, + IPSET_CMD_TEST: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_LIST, + IPSET_CMD_SAVE: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK | unix.NLM_F_ROOT | unix.NLM_F_MATCH | unix.NLM_F_DUMP + case IPSET_CMD_ADD, + IPSET_CMD_DEL: + return unix.NLM_F_REQUEST | unix.NLM_F_ACK + case IPSET_CMD_HEADER, + IPSET_CMD_TYPE, + IPSET_CMD_PROTOCOL: + return unix.NLM_F_REQUEST + default: + return 0 + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/vendor/github.com/vishvananda/netlink/nl/link_linux.go index e4a192f8ea..e10edbc09d 100644 --- a/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -1,35 +1,13 @@ package nl import ( - "syscall" + "bytes" + "encoding/binary" "unsafe" ) const ( DEFAULT_CHANGE = 0xFFFFFFFF - // doesn't exist in syscall - IFLA_VFINFO_LIST = syscall.IFLA_IFALIAS + 1 + iota - IFLA_STATS64 - IFLA_VF_PORTS - IFLA_PORT_SELF - IFLA_AF_SPEC - IFLA_GROUP - IFLA_NET_NS_FD - IFLA_EXT_MASK - IFLA_PROMISCUITY - IFLA_NUM_TX_QUEUES - IFLA_NUM_RX_QUEUES - IFLA_CARRIER - IFLA_PHYS_PORT_ID - IFLA_CARRIER_CHANGES - IFLA_PHYS_SWITCH_ID - IFLA_LINK_NETNSID - IFLA_PHYS_PORT_NAME - IFLA_PROTO_DOWN - IFLA_GSO_MAX_SEGS - IFLA_GSO_MAX_SIZE - IFLA_PAD - IFLA_XDP ) const ( @@ -37,7 +15,9 @@ const ( IFLA_INFO_KIND IFLA_INFO_DATA IFLA_INFO_XSTATS - IFLA_INFO_MAX = IFLA_INFO_XSTATS + IFLA_INFO_SLAVE_KIND + IFLA_INFO_SLAVE_DATA + IFLA_INFO_MAX = IFLA_INFO_SLAVE_DATA ) const ( @@ -111,13 +91,18 @@ const ( const ( IFLA_IPVLAN_UNSPEC = iota IFLA_IPVLAN_MODE - IFLA_IPVLAN_MAX = IFLA_IPVLAN_MODE + IFLA_IPVLAN_FLAG + IFLA_IPVLAN_MAX = IFLA_IPVLAN_FLAG ) const ( IFLA_MACVLAN_UNSPEC = iota IFLA_MACVLAN_MODE IFLA_MACVLAN_FLAGS + IFLA_MACVLAN_MACADDR_MODE + IFLA_MACVLAN_MACADDR + IFLA_MACVLAN_MACADDR_DATA + IFLA_MACVLAN_MACADDR_COUNT IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS ) @@ -129,6 +114,13 @@ const ( MACVLAN_MODE_SOURCE = 16 ) +const ( + MACVLAN_MACADDR_ADD = iota + MACVLAN_MACADDR_DEL + MACVLAN_MACADDR_FLUSH + MACVLAN_MACADDR_SET +) + const ( IFLA_BOND_UNSPEC = iota IFLA_BOND_MODE @@ -177,6 +169,24 @@ const ( IFLA_BOND_SLAVE_PERM_HWADDR IFLA_BOND_SLAVE_QUEUE_ID IFLA_BOND_SLAVE_AD_AGGREGATOR_ID + IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE + IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE +) + +const ( + IFLA_GENEVE_UNSPEC = iota + IFLA_GENEVE_ID // vni + IFLA_GENEVE_REMOTE + IFLA_GENEVE_TTL + IFLA_GENEVE_TOS + IFLA_GENEVE_PORT // destination port + IFLA_GENEVE_COLLECT_METADATA + IFLA_GENEVE_REMOTE6 + IFLA_GENEVE_UDP_CSUM + IFLA_GENEVE_UDP_ZERO_CSUM6_TX + IFLA_GENEVE_UDP_ZERO_CSUM6_RX + IFLA_GENEVE_LABEL + IFLA_GENEVE_MAX = IFLA_GENEVE_LABEL ) const ( @@ -230,9 +240,11 @@ const ( IFLA_VF_RSS_QUERY_EN /* RSS Redirection Table and Hash Key query * on/off switch */ - IFLA_VF_STATS /* network device statistics */ - IFLA_VF_TRUST /* Trust state of VF */ - IFLA_VF_MAX = IFLA_VF_TRUST + IFLA_VF_STATS /* network device statistics */ + IFLA_VF_TRUST /* Trust state of VF */ + IFLA_VF_IB_NODE_GUID /* VF Infiniband node GUID */ + IFLA_VF_IB_PORT_GUID /* VF Infiniband port GUID */ + IFLA_VF_MAX = IFLA_VF_IB_PORT_GUID ) const ( @@ -249,7 +261,9 @@ const ( IFLA_VF_STATS_TX_BYTES IFLA_VF_STATS_BROADCAST IFLA_VF_STATS_MULTICAST - IFLA_VF_STATS_MAX = IFLA_VF_STATS_MULTICAST + IFLA_VF_STATS_RX_DROPPED + IFLA_VF_STATS_TX_DROPPED + IFLA_VF_STATS_MAX = IFLA_VF_STATS_TX_DROPPED ) const ( @@ -261,6 +275,7 @@ const ( SizeofVfLinkState = 0x08 SizeofVfRssQueryEn = 0x08 SizeofVfTrust = 0x08 + SizeofVfGUID = 0x10 ) // struct ifla_vf_mac { @@ -331,6 +346,59 @@ func (msg *VfTxRate) Serialize() []byte { return (*(*[SizeofVfTxRate]byte)(unsafe.Pointer(msg)))[:] } +//struct ifla_vf_stats { +// __u64 rx_packets; +// __u64 tx_packets; +// __u64 rx_bytes; +// __u64 tx_bytes; +// __u64 broadcast; +// __u64 multicast; +//}; + +type VfStats struct { + RxPackets uint64 + TxPackets uint64 + RxBytes uint64 + TxBytes uint64 + Multicast uint64 + Broadcast uint64 + RxDropped uint64 + TxDropped uint64 +} + +func DeserializeVfStats(b []byte) VfStats { + var vfstat VfStats + stats, err := ParseRouteAttr(b) + if err != nil { + return vfstat + } + var valueVar uint64 + for _, stat := range stats { + if err := binary.Read(bytes.NewBuffer(stat.Value), NativeEndian(), &valueVar); err != nil { + break + } + switch stat.Attr.Type { + case IFLA_VF_STATS_RX_PACKETS: + vfstat.RxPackets = valueVar + case IFLA_VF_STATS_TX_PACKETS: + vfstat.TxPackets = valueVar + case IFLA_VF_STATS_RX_BYTES: + vfstat.RxBytes = valueVar + case IFLA_VF_STATS_TX_BYTES: + vfstat.TxBytes = valueVar + case IFLA_VF_STATS_MULTICAST: + vfstat.Multicast = valueVar + case IFLA_VF_STATS_BROADCAST: + vfstat.Broadcast = valueVar + case IFLA_VF_STATS_RX_DROPPED: + vfstat.RxDropped = valueVar + case IFLA_VF_STATS_TX_DROPPED: + vfstat.TxDropped = valueVar + } + } + return vfstat +} + // struct ifla_vf_rate { // __u32 vf; // __u32 min_tx_rate; /* Min Bandwidth in Mbps */ @@ -443,6 +511,37 @@ func (msg *VfTrust) Serialize() []byte { return (*(*[SizeofVfTrust]byte)(unsafe.Pointer(msg)))[:] } +// struct ifla_vf_guid { +// __u32 vf; +// __u32 rsvd; +// __u64 guid; +// }; + +type VfGUID struct { + Vf uint32 + Rsvd uint32 + GUID uint64 +} + +func (msg *VfGUID) Len() int { + return SizeofVfGUID +} + +func DeserializeVfGUID(b []byte) *VfGUID { + return (*VfGUID)(unsafe.Pointer(&b[0:SizeofVfGUID][0])) +} + +func (msg *VfGUID) Serialize() []byte { + return (*(*[SizeofVfGUID]byte)(unsafe.Pointer(msg)))[:] +} + +const ( + XDP_FLAGS_UPDATE_IF_NOEXIST = 1 << iota + XDP_FLAGS_SKB_MODE + XDP_FLAGS_DRV_MODE + XDP_FLAGS_MASK = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE +) + const ( IFLA_XDP_UNSPEC = iota IFLA_XDP_FD /* fd of xdp program to attach, or -1 to remove */ @@ -452,6 +551,14 @@ const ( IFLA_XDP_MAX = IFLA_XDP_PROG_ID ) +// XDP program attach mode (used as dump value for IFLA_XDP_ATTACHED) +const ( + XDP_ATTACHED_NONE = iota + XDP_ATTACHED_DRV + XDP_ATTACHED_SKB + XDP_ATTACHED_HW +) + const ( IFLA_IPTUN_UNSPEC = iota IFLA_IPTUN_LINK @@ -468,7 +575,12 @@ const ( IFLA_IPTUN_6RD_RELAY_PREFIX IFLA_IPTUN_6RD_PREFIXLEN IFLA_IPTUN_6RD_RELAY_PREFIXLEN - IFLA_IPTUN_MAX = IFLA_IPTUN_6RD_RELAY_PREFIXLEN + IFLA_IPTUN_ENCAP_TYPE + IFLA_IPTUN_ENCAP_FLAGS + IFLA_IPTUN_ENCAP_SPORT + IFLA_IPTUN_ENCAP_DPORT + IFLA_IPTUN_COLLECT_METADATA + IFLA_IPTUN_MAX = IFLA_IPTUN_COLLECT_METADATA ) const ( @@ -547,3 +659,62 @@ const ( GTP_ROLE_GGSN = iota GTP_ROLE_SGSN ) + +const ( + IFLA_XFRM_UNSPEC = iota + IFLA_XFRM_LINK + IFLA_XFRM_IF_ID + + IFLA_XFRM_MAX = iota - 1 +) + +const ( + IFLA_TUN_UNSPEC = iota + IFLA_TUN_OWNER + IFLA_TUN_GROUP + IFLA_TUN_TYPE + IFLA_TUN_PI + IFLA_TUN_VNET_HDR + IFLA_TUN_PERSIST + IFLA_TUN_MULTI_QUEUE + IFLA_TUN_NUM_QUEUES + IFLA_TUN_NUM_DISABLED_QUEUES + IFLA_TUN_MAX = IFLA_TUN_NUM_DISABLED_QUEUES +) + +const ( + IFLA_IPOIB_UNSPEC = iota + IFLA_IPOIB_PKEY + IFLA_IPOIB_MODE + IFLA_IPOIB_UMCAST + IFLA_IPOIB_MAX = IFLA_IPOIB_UMCAST +) + +const ( + IFLA_CAN_UNSPEC = iota + IFLA_CAN_BITTIMING + IFLA_CAN_BITTIMING_CONST + IFLA_CAN_CLOCK + IFLA_CAN_STATE + IFLA_CAN_CTRLMODE + IFLA_CAN_RESTART_MS + IFLA_CAN_RESTART + IFLA_CAN_BERR_COUNTER + IFLA_CAN_DATA_BITTIMING + IFLA_CAN_DATA_BITTIMING_CONST + IFLA_CAN_TERMINATION + IFLA_CAN_TERMINATION_CONST + IFLA_CAN_BITRATE_CONST + IFLA_CAN_DATA_BITRATE_CONST + IFLA_CAN_BITRATE_MAX + IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX +) + +const ( + IFLA_BAREUDP_UNSPEC = iota + IFLA_BAREUDP_PORT + IFLA_BAREUDP_ETHERTYPE + IFLA_BAREUDP_SRCPORT_MIN + IFLA_BAREUDP_MULTIPROTO_MODE + IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE +) diff --git a/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go new file mode 100644 index 0000000000..bafd593c4f --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go @@ -0,0 +1,29 @@ +package nl + +const ( + LWT_BPF_PROG_UNSPEC = iota + LWT_BPF_PROG_FD + LWT_BPF_PROG_NAME + __LWT_BPF_PROG_MAX +) + +const ( + LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1 +) + +const ( + LWT_BPF_UNSPEC = iota + LWT_BPF_IN + LWT_BPF_OUT + LWT_BPF_XMIT + LWT_BPF_XMIT_HEADROOM + __LWT_BPF_MAX +) + +const ( + LWT_BPF_MAX = __LWT_BPF_MAX - 1 +) + +const ( + LWT_BPF_MAX_HEADROOM = 256 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index 1329acd864..600b942b17 100644 --- a/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -13,21 +13,35 @@ import ( "unsafe" "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) const ( // Family type definitions - FAMILY_ALL = syscall.AF_UNSPEC - FAMILY_V4 = syscall.AF_INET - FAMILY_V6 = syscall.AF_INET6 - FAMILY_MPLS = AF_MPLS + FAMILY_ALL = unix.AF_UNSPEC + FAMILY_V4 = unix.AF_INET + FAMILY_V6 = unix.AF_INET6 + FAMILY_MPLS = unix.AF_MPLS + // Arbitrary set value (greater than default 4k) to allow receiving + // from kernel more verbose messages e.g. for statistics, + // tc rules or filters, or other more memory requiring data. + RECEIVE_BUFFER_SIZE = 65536 + // Kernel netlink pid + PidKernel uint32 = 0 + SizeofCnMsgOp = 0x18 ) // SupportedNlFamilies contains the list of netlink families this netlink package supports -var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER} +var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER} var nextSeqNr uint32 +// Default netlink socket timeout, 60s +var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0} + +// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets +var EnableErrorMessageReporting bool = false + // GetIPFamily returns the family type of a net.IP. func GetIPFamily(ip net.IP) int { if len(ip) <= net.IPv4len { @@ -41,7 +55,7 @@ func GetIPFamily(ip net.IP) int { var nativeEndian binary.ByteOrder -// Get native endianness for the system +// NativeEndian gets native endianness for the system func NativeEndian() binary.ByteOrder { if nativeEndian == nil { var x uint32 = 0x01020304 @@ -70,168 +84,226 @@ func Swap32(i uint32) uint32 { return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 } +const ( + NLMSGERR_ATTR_UNUSED = 0 + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 +) + type NetlinkRequestData interface { Len() int Serialize() []byte } +const ( + PROC_CN_MCAST_LISTEN = 1 + PROC_CN_MCAST_IGNORE +) + +type CbID struct { + Idx uint32 + Val uint32 +} + +type CnMsg struct { + ID CbID + Seq uint32 + Ack uint32 + Length uint16 + Flags uint16 +} + +type CnMsgOp struct { + CnMsg + // here we differ from the C header + Op uint32 +} + +func NewCnMsg(idx, val, op uint32) *CnMsgOp { + var cm CnMsgOp + + cm.ID.Idx = idx + cm.ID.Val = val + + cm.Ack = 0 + cm.Seq = 1 + cm.Length = uint16(binary.Size(op)) + cm.Op = op + + return &cm +} + +func (msg *CnMsgOp) Serialize() []byte { + return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:] +} + +func DeserializeCnMsgOp(b []byte) *CnMsgOp { + return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0])) +} + +func (msg *CnMsgOp) Len() int { + return SizeofCnMsgOp +} + // IfInfomsg is related to links, but it is used for list requests as well type IfInfomsg struct { - syscall.IfInfomsg + unix.IfInfomsg } // Create an IfInfomsg with family specified func NewIfInfomsg(family int) *IfInfomsg { return &IfInfomsg{ - IfInfomsg: syscall.IfInfomsg{ + IfInfomsg: unix.IfInfomsg{ Family: uint8(family), }, } } func DeserializeIfInfomsg(b []byte) *IfInfomsg { - return (*IfInfomsg)(unsafe.Pointer(&b[0:syscall.SizeofIfInfomsg][0])) + return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0])) } func (msg *IfInfomsg) Serialize() []byte { - return (*(*[syscall.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:] } func (msg *IfInfomsg) Len() int { - return syscall.SizeofIfInfomsg + return unix.SizeofIfInfomsg } func (msg *IfInfomsg) EncapType() string { switch msg.Type { case 0: return "generic" - case syscall.ARPHRD_ETHER: + case unix.ARPHRD_ETHER: return "ether" - case syscall.ARPHRD_EETHER: + case unix.ARPHRD_EETHER: return "eether" - case syscall.ARPHRD_AX25: + case unix.ARPHRD_AX25: return "ax25" - case syscall.ARPHRD_PRONET: + case unix.ARPHRD_PRONET: return "pronet" - case syscall.ARPHRD_CHAOS: + case unix.ARPHRD_CHAOS: return "chaos" - case syscall.ARPHRD_IEEE802: + case unix.ARPHRD_IEEE802: return "ieee802" - case syscall.ARPHRD_ARCNET: + case unix.ARPHRD_ARCNET: return "arcnet" - case syscall.ARPHRD_APPLETLK: + case unix.ARPHRD_APPLETLK: return "atalk" - case syscall.ARPHRD_DLCI: + case unix.ARPHRD_DLCI: return "dlci" - case syscall.ARPHRD_ATM: + case unix.ARPHRD_ATM: return "atm" - case syscall.ARPHRD_METRICOM: + case unix.ARPHRD_METRICOM: return "metricom" - case syscall.ARPHRD_IEEE1394: + case unix.ARPHRD_IEEE1394: return "ieee1394" - case syscall.ARPHRD_INFINIBAND: + case unix.ARPHRD_INFINIBAND: return "infiniband" - case syscall.ARPHRD_SLIP: + case unix.ARPHRD_SLIP: return "slip" - case syscall.ARPHRD_CSLIP: + case unix.ARPHRD_CSLIP: return "cslip" - case syscall.ARPHRD_SLIP6: + case unix.ARPHRD_SLIP6: return "slip6" - case syscall.ARPHRD_CSLIP6: + case unix.ARPHRD_CSLIP6: return "cslip6" - case syscall.ARPHRD_RSRVD: + case unix.ARPHRD_RSRVD: return "rsrvd" - case syscall.ARPHRD_ADAPT: + case unix.ARPHRD_ADAPT: return "adapt" - case syscall.ARPHRD_ROSE: + case unix.ARPHRD_ROSE: return "rose" - case syscall.ARPHRD_X25: + case unix.ARPHRD_X25: return "x25" - case syscall.ARPHRD_HWX25: + case unix.ARPHRD_HWX25: return "hwx25" - case syscall.ARPHRD_PPP: + case unix.ARPHRD_PPP: return "ppp" - case syscall.ARPHRD_HDLC: + case unix.ARPHRD_HDLC: return "hdlc" - case syscall.ARPHRD_LAPB: + case unix.ARPHRD_LAPB: return "lapb" - case syscall.ARPHRD_DDCMP: + case unix.ARPHRD_DDCMP: return "ddcmp" - case syscall.ARPHRD_RAWHDLC: + case unix.ARPHRD_RAWHDLC: return "rawhdlc" - case syscall.ARPHRD_TUNNEL: + case unix.ARPHRD_TUNNEL: return "ipip" - case syscall.ARPHRD_TUNNEL6: + case unix.ARPHRD_TUNNEL6: return "tunnel6" - case syscall.ARPHRD_FRAD: + case unix.ARPHRD_FRAD: return "frad" - case syscall.ARPHRD_SKIP: + case unix.ARPHRD_SKIP: return "skip" - case syscall.ARPHRD_LOOPBACK: + case unix.ARPHRD_LOOPBACK: return "loopback" - case syscall.ARPHRD_LOCALTLK: + case unix.ARPHRD_LOCALTLK: return "ltalk" - case syscall.ARPHRD_FDDI: + case unix.ARPHRD_FDDI: return "fddi" - case syscall.ARPHRD_BIF: + case unix.ARPHRD_BIF: return "bif" - case syscall.ARPHRD_SIT: + case unix.ARPHRD_SIT: return "sit" - case syscall.ARPHRD_IPDDP: + case unix.ARPHRD_IPDDP: return "ip/ddp" - case syscall.ARPHRD_IPGRE: + case unix.ARPHRD_IPGRE: return "gre" - case syscall.ARPHRD_PIMREG: + case unix.ARPHRD_PIMREG: return "pimreg" - case syscall.ARPHRD_HIPPI: + case unix.ARPHRD_HIPPI: return "hippi" - case syscall.ARPHRD_ASH: + case unix.ARPHRD_ASH: return "ash" - case syscall.ARPHRD_ECONET: + case unix.ARPHRD_ECONET: return "econet" - case syscall.ARPHRD_IRDA: + case unix.ARPHRD_IRDA: return "irda" - case syscall.ARPHRD_FCPP: + case unix.ARPHRD_FCPP: return "fcpp" - case syscall.ARPHRD_FCAL: + case unix.ARPHRD_FCAL: return "fcal" - case syscall.ARPHRD_FCPL: + case unix.ARPHRD_FCPL: return "fcpl" - case syscall.ARPHRD_FCFABRIC: + case unix.ARPHRD_FCFABRIC: return "fcfb0" - case syscall.ARPHRD_FCFABRIC + 1: + case unix.ARPHRD_FCFABRIC + 1: return "fcfb1" - case syscall.ARPHRD_FCFABRIC + 2: + case unix.ARPHRD_FCFABRIC + 2: return "fcfb2" - case syscall.ARPHRD_FCFABRIC + 3: + case unix.ARPHRD_FCFABRIC + 3: return "fcfb3" - case syscall.ARPHRD_FCFABRIC + 4: + case unix.ARPHRD_FCFABRIC + 4: return "fcfb4" - case syscall.ARPHRD_FCFABRIC + 5: + case unix.ARPHRD_FCFABRIC + 5: return "fcfb5" - case syscall.ARPHRD_FCFABRIC + 6: + case unix.ARPHRD_FCFABRIC + 6: return "fcfb6" - case syscall.ARPHRD_FCFABRIC + 7: + case unix.ARPHRD_FCFABRIC + 7: return "fcfb7" - case syscall.ARPHRD_FCFABRIC + 8: + case unix.ARPHRD_FCFABRIC + 8: return "fcfb8" - case syscall.ARPHRD_FCFABRIC + 9: + case unix.ARPHRD_FCFABRIC + 9: return "fcfb9" - case syscall.ARPHRD_FCFABRIC + 10: + case unix.ARPHRD_FCFABRIC + 10: return "fcfb10" - case syscall.ARPHRD_FCFABRIC + 11: + case unix.ARPHRD_FCFABRIC + 11: return "fcfb11" - case syscall.ARPHRD_FCFABRIC + 12: + case unix.ARPHRD_FCFABRIC + 12: return "fcfb12" - case syscall.ARPHRD_IEEE802_TR: + case unix.ARPHRD_IEEE802_TR: return "tr" - case syscall.ARPHRD_IEEE80211: + case unix.ARPHRD_IEEE80211: return "ieee802.11" - case syscall.ARPHRD_IEEE80211_PRISM: + case unix.ARPHRD_IEEE80211_PRISM: return "ieee802.11/prism" - case syscall.ARPHRD_IEEE80211_RADIOTAP: + case unix.ARPHRD_IEEE80211_RADIOTAP: return "ieee802.11/radiotap" - case syscall.ARPHRD_IEEE802154: + case unix.ARPHRD_IEEE802154: return "ieee802.15.4" case 65534: @@ -242,8 +314,14 @@ func (msg *IfInfomsg) EncapType() string { return fmt.Sprintf("unknown%d", msg.Type) } +// Round the length of a netlink message up to align it properly. +// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license. +func nlmAlignOf(msglen int) int { + return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1) +} + func rtaAlignOf(attrlen int) int { - return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1) + return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1) } func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { @@ -252,9 +330,32 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { return msg } +type Uint32Attribute struct { + Type uint16 + Value uint32 +} + +func (a *Uint32Attribute) Serialize() []byte { + native := NativeEndian() + buf := make([]byte, rtaAlignOf(8)) + native.PutUint16(buf[0:2], 8) + native.PutUint16(buf[2:4], a.Type) + + if a.Type&NLA_F_NET_BYTEORDER != 0 { + binary.BigEndian.PutUint32(buf[4:], a.Value) + } else { + native.PutUint32(buf[4:], a.Value) + } + return buf +} + +func (a *Uint32Attribute) Len() int { + return 8 +} + // Extend RtAttr to handle data and children type RtAttr struct { - syscall.RtAttr + unix.RtAttr Data []byte children []NetlinkRequestData } @@ -262,7 +363,7 @@ type RtAttr struct { // Create a new Extended RtAttr object func NewRtAttr(attrType int, data []byte) *RtAttr { return &RtAttr{ - RtAttr: syscall.RtAttr{ + RtAttr: unix.RtAttr{ Type: uint16(attrType), }, children: []NetlinkRequestData{}, @@ -270,23 +371,35 @@ func NewRtAttr(attrType int, data []byte) *RtAttr { } } -// Create a new RtAttr obj anc add it as a child of an existing object +// NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute +// +// Deprecated: Use AddRtAttr() on the parent object func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr { + return parent.AddRtAttr(attrType, data) +} + +// AddRtAttr adds an RtAttr as a child and returns the new attribute +func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr { attr := NewRtAttr(attrType, data) - parent.children = append(parent.children, attr) + a.children = append(a.children, attr) return attr } +// AddChild adds an existing NetlinkRequestData as a child. +func (a *RtAttr) AddChild(attr NetlinkRequestData) { + a.children = append(a.children, attr) +} + func (a *RtAttr) Len() int { if len(a.children) == 0 { - return (syscall.SizeofRtAttr + len(a.Data)) + return (unix.SizeofRtAttr + len(a.Data)) } l := 0 for _, child := range a.children { l += rtaAlignOf(child.Len()) } - l += syscall.SizeofRtAttr + l += unix.SizeofRtAttr return rtaAlignOf(l + len(a.Data)) } @@ -319,7 +432,7 @@ func (a *RtAttr) Serialize() []byte { } type NetlinkRequest struct { - syscall.NlMsghdr + unix.NlMsghdr Data []NetlinkRequestData RawData []byte Sockets map[int]*SocketHandle @@ -327,7 +440,7 @@ type NetlinkRequest struct { // Serialize the Netlink Request into a byte array func (req *NetlinkRequest) Serialize() []byte { - length := syscall.SizeofNlMsghdr + length := unix.SizeofNlMsghdr dataBytes := make([][]byte, len(req.Data)) for i, data := range req.Data { dataBytes[i] = data.Serialize() @@ -337,8 +450,8 @@ func (req *NetlinkRequest) Serialize() []byte { req.Len = uint32(length) b := make([]byte, length) - hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:] - next := syscall.SizeofNlMsghdr + hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:] + next := unix.SizeofNlMsghdr copy(b[0:next], hdr) for _, data := range dataBytes { for _, dataByte := range data { @@ -354,16 +467,12 @@ func (req *NetlinkRequest) Serialize() []byte { } func (req *NetlinkRequest) AddData(data NetlinkRequestData) { - if data != nil { - req.Data = append(req.Data, data) - } + req.Data = append(req.Data, data) } // AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization func (req *NetlinkRequest) AddRawData(data []byte) { - if data != nil { - req.RawData = append(req.RawData, data...) - } + req.RawData = append(req.RawData, data...) } // Execute the request against a the given sockType. @@ -388,6 +497,19 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro if err != nil { return nil, err } + + if err := s.SetSendTimeout(&SocketTimeoutTv); err != nil { + return nil, err + } + if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { + return nil, err + } + if EnableErrorMessageReporting { + if err := s.SetExtAck(true); err != nil { + return nil, err + } + } + defer s.Close() } else { s.Lock() @@ -407,10 +529,13 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro done: for { - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { return nil, err } + if from.Pid != PidKernel { + return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, PidKernel) + } for _, m := range msgs { if m.Header.Seq != req.Seq { if sharedSocket { @@ -419,24 +544,47 @@ done: return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq) } if m.Header.Pid != pid { - return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid) - } - if m.Header.Type == syscall.NLMSG_DONE { - break done + continue } - if m.Header.Type == syscall.NLMSG_ERROR { + if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { native := NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + errno := int32(native.Uint32(m.Data[0:4])) + if errno == 0 { break done } - return nil, syscall.Errno(-error) + var err error + err = syscall.Errno(-errno) + + unreadData := m.Data[4:] + if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + // Skip the echoed request message. + echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) + unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] + + // Annotate `err` using nlmsgerr attributes. + for len(unreadData) >= syscall.SizeofRtAttr { + attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0])) + attrData := unreadData[syscall.SizeofRtAttr:attr.Len] + + switch attr.Type { + case NLMSGERR_ATTR_MSG: + err = fmt.Errorf("%w: %s", err, string(attrData)) + + default: + // TODO: handle other NLMSGERR_ATTR types + } + + unreadData = unreadData[rtaAlignOf(int(attr.Len)):] + } + } + + return nil, err } if resType != 0 && m.Header.Type != resType { continue } res = append(res, m.Data) - if m.Header.Flags&syscall.NLM_F_MULTI == 0 { + if m.Header.Flags&unix.NLM_F_MULTI == 0 { break done } } @@ -449,10 +597,10 @@ done: // the message is serialized func NewNetlinkRequest(proto, flags int) *NetlinkRequest { return &NetlinkRequest{ - NlMsghdr: syscall.NlMsghdr{ - Len: uint32(syscall.SizeofNlMsghdr), + NlMsghdr: unix.NlMsghdr{ + Len: uint32(unix.SizeofNlMsghdr), Type: uint16(proto), - Flags: syscall.NLM_F_REQUEST | uint16(flags), + Flags: unix.NLM_F_REQUEST | uint16(flags), Seq: atomic.AddUint32(&nextSeqNr, 1), }, } @@ -460,21 +608,21 @@ func NewNetlinkRequest(proto, flags int) *NetlinkRequest { type NetlinkSocket struct { fd int32 - lsa syscall.SockaddrNetlink + lsa unix.SockaddrNetlink sync.Mutex } func getNetlinkSocket(protocol int) (*NetlinkSocket, error) { - fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW|syscall.SOCK_CLOEXEC, protocol) + fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol) if err != nil { return nil, err } s := &NetlinkSocket{ fd: int32(fd), } - s.lsa.Family = syscall.AF_NETLINK - if err := syscall.Bind(fd, &s.lsa); err != nil { - syscall.Close(fd) + s.lsa.Family = unix.AF_NETLINK + if err := unix.Bind(fd, &s.lsa); err != nil { + unix.Close(fd) return nil, err } @@ -551,21 +699,21 @@ func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) { // Returns the netlink socket on which Receive() method can be called // to retrieve the messages from the kernel. func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) { - fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol) + fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol) if err != nil { return nil, err } s := &NetlinkSocket{ fd: int32(fd), } - s.lsa.Family = syscall.AF_NETLINK + s.lsa.Family = unix.AF_NETLINK for _, g := range groups { s.lsa.Groups |= (1 << (g - 1)) } - if err := syscall.Bind(fd, &s.lsa); err != nil { - syscall.Close(fd) + if err := unix.Bind(fd, &s.lsa); err != nil { + unix.Close(fd) return nil, err } @@ -586,7 +734,7 @@ func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*Ne func (s *NetlinkSocket) Close() { fd := int(atomic.SwapInt32(&s.fd, -1)) - syscall.Close(fd) + unix.Close(fd) } func (s *NetlinkSocket) GetFd() int { @@ -598,37 +746,71 @@ func (s *NetlinkSocket) Send(request *NetlinkRequest) error { if fd < 0 { return fmt.Errorf("Send called on a closed socket") } - if err := syscall.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil { + if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil { return err } return nil } -func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) { +func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, *unix.SockaddrNetlink, error) { fd := int(atomic.LoadInt32(&s.fd)) if fd < 0 { - return nil, fmt.Errorf("Receive called on a closed socket") + return nil, nil, fmt.Errorf("Receive called on a closed socket") } - rb := make([]byte, syscall.Getpagesize()) - nr, _, err := syscall.Recvfrom(fd, rb, 0) + var fromAddr *unix.SockaddrNetlink + var rb [RECEIVE_BUFFER_SIZE]byte + nr, from, err := unix.Recvfrom(fd, rb[:], 0) if err != nil { - return nil, err + return nil, nil, err + } + fromAddr, ok := from.(*unix.SockaddrNetlink) + if !ok { + return nil, nil, fmt.Errorf("Error converting to netlink sockaddr") + } + if nr < unix.NLMSG_HDRLEN { + return nil, nil, fmt.Errorf("Got short response from netlink") } - if nr < syscall.NLMSG_HDRLEN { - return nil, fmt.Errorf("Got short response from netlink") + rb2 := make([]byte, nr) + copy(rb2, rb[:nr]) + nl, err := syscall.ParseNetlinkMessage(rb2) + if err != nil { + return nil, nil, err } - rb = rb[:nr] - return syscall.ParseNetlinkMessage(rb) + return nl, fromAddr, nil +} + +// SetSendTimeout allows to set a send timeout on the socket +func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error { + // Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine + // remains stuck on a send on a closed fd + return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout) +} + +// SetReceiveTimeout allows to set a receive timeout on the socket +func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { + // Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine + // remains stuck on a recvmsg on a closed fd + return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) +} + +// SetExtAck requests error messages to be reported on the socket +func (s *NetlinkSocket) SetExtAck(enable bool) error { + var enableN int + if enable { + enableN = 1 + } + + return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN) } func (s *NetlinkSocket) GetPid() (uint32, error) { fd := int(atomic.LoadInt32(&s.fd)) - lsa, err := syscall.Getsockname(fd) + lsa, err := unix.Getsockname(fd) if err != nil { return 0, err } switch v := lsa.(type) { - case *syscall.SockaddrNetlink: + case *unix.SockaddrNetlink: return v.Pid, nil } return 0, fmt.Errorf("Wrong socket type") @@ -683,24 +865,24 @@ func Uint64Attr(v uint64) []byte { func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) { var attrs []syscall.NetlinkRouteAttr - for len(b) >= syscall.SizeofRtAttr { + for len(b) >= unix.SizeofRtAttr { a, vbuf, alen, err := netlinkRouteAttrAndValue(b) if err != nil { return nil, err } - ra := syscall.NetlinkRouteAttr{Attr: *a, Value: vbuf[:int(a.Len)-syscall.SizeofRtAttr]} + ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]} attrs = append(attrs, ra) b = b[alen:] } return attrs, nil } -func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) { - a := (*syscall.RtAttr)(unsafe.Pointer(&b[0])) - if int(a.Len) < syscall.SizeofRtAttr || int(a.Len) > len(b) { - return nil, nil, 0, syscall.EINVAL +func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) { + a := (*unix.RtAttr)(unsafe.Pointer(&b[0])) + if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) { + return nil, nil, 0, unix.EINVAL } - return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil + return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil } // SocketHandle contains the netlink socket and the associated diff --git a/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go new file mode 100644 index 0000000000..7f49125cff --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/parse_attr_linux.go @@ -0,0 +1,79 @@ +package nl + +import ( + "encoding/binary" + "fmt" + "log" +) + +type Attribute struct { + Type uint16 + Value []byte +} + +func ParseAttributes(data []byte) <-chan Attribute { + native := NativeEndian() + result := make(chan Attribute) + + go func() { + i := 0 + for i+4 < len(data) { + length := int(native.Uint16(data[i : i+2])) + attrType := native.Uint16(data[i+2 : i+4]) + + if length < 4 { + log.Printf("attribute 0x%02x has invalid length of %d bytes", attrType, length) + break + } + + if len(data) < i+length { + log.Printf("attribute 0x%02x of length %d is truncated, only %d bytes remaining", attrType, length, len(data)-i) + break + } + + result <- Attribute{ + Type: attrType, + Value: data[i+4 : i+length], + } + i += rtaAlignOf(length) + } + close(result) + }() + + return result +} + +func PrintAttributes(data []byte) { + printAttributes(data, 0) +} + +func printAttributes(data []byte, level int) { + for attr := range ParseAttributes(data) { + for i := 0; i < level; i++ { + print("> ") + } + nested := attr.Type&NLA_F_NESTED != 0 + fmt.Printf("type=%d nested=%v len=%v %v\n", attr.Type&NLA_TYPE_MASK, nested, len(attr.Value), attr.Value) + if nested { + printAttributes(attr.Value, level+1) + } + } +} + +// Uint32 returns the uint32 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint32() uint32 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint32(attr.Value) + } else { + return NativeEndian().Uint32(attr.Value) + } +} + +// Uint64 returns the uint64 value respecting the NET_BYTEORDER flag +func (attr *Attribute) Uint64() uint64 { + if attr.Type&NLA_F_NET_BYTEORDER != 0 { + return binary.BigEndian.Uint64(attr.Value) + } else { + return NativeEndian().Uint64(attr.Value) + } +} diff --git a/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go new file mode 100644 index 0000000000..ce43ee1550 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go @@ -0,0 +1,39 @@ +package nl + +const ( + RDMA_NL_GET_CLIENT_SHIFT = 10 +) + +const ( + RDMA_NL_NLDEV = 5 +) + +const ( + RDMA_NLDEV_CMD_GET = 1 + RDMA_NLDEV_CMD_SET = 2 + RDMA_NLDEV_CMD_NEWLINK = 3 + RDMA_NLDEV_CMD_DELLINK = 4 + RDMA_NLDEV_CMD_SYS_GET = 6 + RDMA_NLDEV_CMD_SYS_SET = 7 +) + +const ( + RDMA_NLDEV_ATTR_DEV_INDEX = 1 + RDMA_NLDEV_ATTR_DEV_NAME = 2 + RDMA_NLDEV_ATTR_PORT_INDEX = 3 + RDMA_NLDEV_ATTR_CAP_FLAGS = 4 + RDMA_NLDEV_ATTR_FW_VERSION = 5 + RDMA_NLDEV_ATTR_NODE_GUID = 6 + RDMA_NLDEV_ATTR_SYS_IMAGE_GUID = 7 + RDMA_NLDEV_ATTR_SUBNET_PREFIX = 8 + RDMA_NLDEV_ATTR_LID = 9 + RDMA_NLDEV_ATTR_SM_LID = 10 + RDMA_NLDEV_ATTR_LMC = 11 + RDMA_NLDEV_ATTR_PORT_STATE = 12 + RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13 + RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14 + RDMA_NLDEV_ATTR_NDEV_NAME = 51 + RDMA_NLDEV_ATTR_LINK_TYPE = 65 + RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66 + RDMA_NLDEV_NET_NS_FD = 68 +) diff --git a/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/vendor/github.com/vishvananda/netlink/nl/route_linux.go index 1a064d65d2..03c1900ffa 100644 --- a/vendor/github.com/vishvananda/netlink/nl/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -1,65 +1,66 @@ package nl import ( - "syscall" "unsafe" + + "golang.org/x/sys/unix" ) type RtMsg struct { - syscall.RtMsg + unix.RtMsg } func NewRtMsg() *RtMsg { return &RtMsg{ - RtMsg: syscall.RtMsg{ - Table: syscall.RT_TABLE_MAIN, - Scope: syscall.RT_SCOPE_UNIVERSE, - Protocol: syscall.RTPROT_BOOT, - Type: syscall.RTN_UNICAST, + RtMsg: unix.RtMsg{ + Table: unix.RT_TABLE_MAIN, + Scope: unix.RT_SCOPE_UNIVERSE, + Protocol: unix.RTPROT_BOOT, + Type: unix.RTN_UNICAST, }, } } func NewRtDelMsg() *RtMsg { return &RtMsg{ - RtMsg: syscall.RtMsg{ - Table: syscall.RT_TABLE_MAIN, - Scope: syscall.RT_SCOPE_NOWHERE, + RtMsg: unix.RtMsg{ + Table: unix.RT_TABLE_MAIN, + Scope: unix.RT_SCOPE_NOWHERE, }, } } func (msg *RtMsg) Len() int { - return syscall.SizeofRtMsg + return unix.SizeofRtMsg } func DeserializeRtMsg(b []byte) *RtMsg { - return (*RtMsg)(unsafe.Pointer(&b[0:syscall.SizeofRtMsg][0])) + return (*RtMsg)(unsafe.Pointer(&b[0:unix.SizeofRtMsg][0])) } func (msg *RtMsg) Serialize() []byte { - return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:] + return (*(*[unix.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:] } type RtNexthop struct { - syscall.RtNexthop + unix.RtNexthop Children []NetlinkRequestData } func DeserializeRtNexthop(b []byte) *RtNexthop { - return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0])) + return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) } func (msg *RtNexthop) Len() int { if len(msg.Children) == 0 { - return syscall.SizeofRtNexthop + return unix.SizeofRtNexthop } l := 0 for _, child := range msg.Children { l += rtaAlignOf(child.Len()) } - l += syscall.SizeofRtNexthop + l += unix.SizeofRtNexthop return rtaAlignOf(l) } @@ -67,8 +68,8 @@ func (msg *RtNexthop) Serialize() []byte { length := msg.Len() msg.RtNexthop.Len = uint16(length) buf := make([]byte, length) - copy(buf, (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]) - next := rtaAlignOf(syscall.SizeofRtNexthop) + copy(buf, (*(*[unix.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:]) + next := rtaAlignOf(unix.SizeofRtNexthop) if len(msg.Children) > 0 { for _, child := range msg.Children { childBuf := child.Serialize() @@ -78,3 +79,29 @@ func (msg *RtNexthop) Serialize() []byte { } return buf } + +type RtGenMsg struct { + unix.RtGenmsg +} + +func NewRtGenMsg() *RtGenMsg { + return &RtGenMsg{ + RtGenmsg: unix.RtGenmsg{ + Family: unix.AF_UNSPEC, + }, + } +} + +func (msg *RtGenMsg) Len() int { + return rtaAlignOf(unix.SizeofRtGenmsg) +} + +func DeserializeRtGenMsg(b []byte) *RtGenMsg { + return &RtGenMsg{RtGenmsg: unix.RtGenmsg{Family: b[0]}} +} + +func (msg *RtGenMsg) Serialize() []byte { + out := make([]byte, msg.Len()) + out[0] = msg.Family + return out +} diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go new file mode 100644 index 0000000000..fe88285f20 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go @@ -0,0 +1,154 @@ +package nl + +import ( + "errors" + "fmt" + "net" +) + +type IPv6SrHdr struct { + nextHdr uint8 + hdrLen uint8 + routingType uint8 + segmentsLeft uint8 + firstSegment uint8 + flags uint8 + reserved uint16 + + Segments []net.IP +} + +func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool { + if len(s1.Segments) != len(s2.Segments) { + return false + } + for i := range s1.Segments { + if !s1.Segments[i].Equal(s2.Segments[i]) { + return false + } + } + return s1.nextHdr == s2.nextHdr && + s1.hdrLen == s2.hdrLen && + s1.routingType == s2.routingType && + s1.segmentsLeft == s2.segmentsLeft && + s1.firstSegment == s2.firstSegment && + s1.flags == s2.flags + // reserved doesn't need to be identical. +} + +// seg6 encap mode +const ( + SEG6_IPTUN_MODE_INLINE = iota + SEG6_IPTUN_MODE_ENCAP +) + +// number of nested RTATTR +// from include/uapi/linux/seg6_iptunnel.h +const ( + SEG6_IPTUNNEL_UNSPEC = iota + SEG6_IPTUNNEL_SRH + __SEG6_IPTUNNEL_MAX +) +const ( + SEG6_IPTUNNEL_MAX = __SEG6_IPTUNNEL_MAX - 1 +) + +func EncodeSEG6Encap(mode int, segments []net.IP) ([]byte, error) { + nsegs := len(segments) // nsegs: number of segments + if nsegs == 0 { + return nil, errors.New("EncodeSEG6Encap: No Segment in srh") + } + b := make([]byte, 12, 12+len(segments)*16) + native := NativeEndian() + native.PutUint32(b, uint32(mode)) + b[4] = 0 // srh.nextHdr (0 when calling netlink) + b[5] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit) + b[6] = IPV6_SRCRT_TYPE_4 // srh.routingType (assigned by IANA) + b[7] = uint8(nsegs - 1) // srh.segmentsLeft + b[8] = uint8(nsegs - 1) // srh.firstSegment + b[9] = 0 // srh.flags (SR6_FLAG1_HMAC for srh_hmac) + // srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07 + native.PutUint16(b[10:], 0) // srh.reserved + for _, netIP := range segments { + b = append(b, netIP...) // srh.Segments + } + return b, nil +} + +func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) { + native := NativeEndian() + mode := int(native.Uint32(buf)) + srh := IPv6SrHdr{ + nextHdr: buf[4], + hdrLen: buf[5], + routingType: buf[6], + segmentsLeft: buf[7], + firstSegment: buf[8], + flags: buf[9], + reserved: native.Uint16(buf[10:12]), + } + buf = buf[12:] + if len(buf)%16 != 0 { + err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf)) + return mode, nil, err + } + for len(buf) > 0 { + srh.Segments = append(srh.Segments, net.IP(buf[:16])) + buf = buf[16:] + } + return mode, srh.Segments, nil +} + +func DecodeSEG6Srh(buf []byte) ([]net.IP, error) { + native := NativeEndian() + srh := IPv6SrHdr{ + nextHdr: buf[0], + hdrLen: buf[1], + routingType: buf[2], + segmentsLeft: buf[3], + firstSegment: buf[4], + flags: buf[5], + reserved: native.Uint16(buf[6:8]), + } + buf = buf[8:] + if len(buf)%16 != 0 { + err := fmt.Errorf("DecodeSEG6Srh: error parsing Segment List (buf len: %d)", len(buf)) + return nil, err + } + for len(buf) > 0 { + srh.Segments = append(srh.Segments, net.IP(buf[:16])) + buf = buf[16:] + } + return srh.Segments, nil +} +func EncodeSEG6Srh(segments []net.IP) ([]byte, error) { + nsegs := len(segments) // nsegs: number of segments + if nsegs == 0 { + return nil, errors.New("EncodeSEG6Srh: No Segments") + } + b := make([]byte, 8, 8+len(segments)*16) + native := NativeEndian() + b[0] = 0 // srh.nextHdr (0 when calling netlink) + b[1] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit) + b[2] = IPV6_SRCRT_TYPE_4 // srh.routingType (assigned by IANA) + b[3] = uint8(nsegs - 1) // srh.segmentsLeft + b[4] = uint8(nsegs - 1) // srh.firstSegment + b[5] = 0 // srh.flags (SR6_FLAG1_HMAC for srh_hmac) + // srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07 + native.PutUint16(b[6:], 0) // srh.reserved + for _, netIP := range segments { + b = append(b, netIP...) // srh.Segments + } + return b, nil +} + +// Helper functions +func SEG6EncapModeString(mode int) string { + switch mode { + case SEG6_IPTUN_MODE_INLINE: + return "inline" + case SEG6_IPTUN_MODE_ENCAP: + return "encap" + } + return "unknown" +} diff --git a/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go new file mode 100644 index 0000000000..1500177267 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/nl/seg6local_linux.go @@ -0,0 +1,76 @@ +package nl + +import () + +// seg6local parameters +const ( + SEG6_LOCAL_UNSPEC = iota + SEG6_LOCAL_ACTION + SEG6_LOCAL_SRH + SEG6_LOCAL_TABLE + SEG6_LOCAL_NH4 + SEG6_LOCAL_NH6 + SEG6_LOCAL_IIF + SEG6_LOCAL_OIF + __SEG6_LOCAL_MAX +) +const ( + SEG6_LOCAL_MAX = __SEG6_LOCAL_MAX +) + +// seg6local actions +const ( + SEG6_LOCAL_ACTION_END = iota + 1 // 1 + SEG6_LOCAL_ACTION_END_X // 2 + SEG6_LOCAL_ACTION_END_T // 3 + SEG6_LOCAL_ACTION_END_DX2 // 4 + SEG6_LOCAL_ACTION_END_DX6 // 5 + SEG6_LOCAL_ACTION_END_DX4 // 6 + SEG6_LOCAL_ACTION_END_DT6 // 7 + SEG6_LOCAL_ACTION_END_DT4 // 8 + SEG6_LOCAL_ACTION_END_B6 // 9 + SEG6_LOCAL_ACTION_END_B6_ENCAPS // 10 + SEG6_LOCAL_ACTION_END_BM // 11 + SEG6_LOCAL_ACTION_END_S // 12 + SEG6_LOCAL_ACTION_END_AS // 13 + SEG6_LOCAL_ACTION_END_AM // 14 + __SEG6_LOCAL_ACTION_MAX +) +const ( + SEG6_LOCAL_ACTION_MAX = __SEG6_LOCAL_ACTION_MAX - 1 +) + +// Helper functions +func SEG6LocalActionString(action int) string { + switch action { + case SEG6_LOCAL_ACTION_END: + return "End" + case SEG6_LOCAL_ACTION_END_X: + return "End.X" + case SEG6_LOCAL_ACTION_END_T: + return "End.T" + case SEG6_LOCAL_ACTION_END_DX2: + return "End.DX2" + case SEG6_LOCAL_ACTION_END_DX6: + return "End.DX6" + case SEG6_LOCAL_ACTION_END_DX4: + return "End.DX4" + case SEG6_LOCAL_ACTION_END_DT6: + return "End.DT6" + case SEG6_LOCAL_ACTION_END_DT4: + return "End.DT4" + case SEG6_LOCAL_ACTION_END_B6: + return "End.B6" + case SEG6_LOCAL_ACTION_END_B6_ENCAPS: + return "End.B6.Encaps" + case SEG6_LOCAL_ACTION_END_BM: + return "End.BM" + case SEG6_LOCAL_ACTION_END_S: + return "End.S" + case SEG6_LOCAL_ACTION_END_AS: + return "End.AS" + case SEG6_LOCAL_ACTION_END_AM: + return "End.AM" + } + return "unknown" +} diff --git a/vendor/github.com/vishvananda/netlink/nl/syscall.go b/vendor/github.com/vishvananda/netlink/nl/syscall.go index 3473e53638..bdf6ba6395 100644 --- a/vendor/github.com/vishvananda/netlink/nl/syscall.go +++ b/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -1,6 +1,6 @@ package nl -// syscall package lack of rule atributes type. +// syscall package lack of rule attributes type. // Thus there are defined below const ( FRA_UNSPEC = iota @@ -21,6 +21,13 @@ const ( FRA_TABLE /* Extended table id */ FRA_FWMASK /* mask for netfilter mark */ FRA_OIFNAME + FRA_PAD + FRA_L3MDEV /* iif or oif is l3mdev goto its table */ + FRA_UID_RANGE /* UID range */ + FRA_PROTOCOL /* Originator of the rule */ + FRA_IP_PROTO /* ip proto */ + FRA_SPORT_RANGE /* sport */ + FRA_DPORT_RANGE /* dport */ ) // ip rule netlink request types @@ -42,16 +49,6 @@ const ( TCPDIAG_NOCOOKIE = 0xFFFFFFFF /* TCPDIAG_NOCOOKIE in net/ipv4/tcp_diag.h*/ ) -const ( - AF_MPLS = 28 -) - -const ( - RTA_NEWDST = 0x13 - RTA_ENCAP_TYPE = 0x15 - RTA_ENCAP = 0x16 -) - // RTA_ENCAP subtype const ( MPLS_IPTUNNEL_UNSPEC = iota @@ -65,4 +62,15 @@ const ( LWTUNNEL_ENCAP_IP LWTUNNEL_ENCAP_ILA LWTUNNEL_ENCAP_IP6 + LWTUNNEL_ENCAP_SEG6 + LWTUNNEL_ENCAP_BPF + LWTUNNEL_ENCAP_SEG6_LOCAL +) + +// routing header types +const ( + IPV6_SRCRT_STRICT = 0x01 // Deprecated; will be removed + IPV6_SRCRT_TYPE_0 = 0 // Deprecated; will be removed + IPV6_SRCRT_TYPE_2 = 2 // IPv6 type 2 Routing Header + IPV6_SRCRT_TYPE_4 = 4 // Segment Routing with IPv6 ) diff --git a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go index e91fb21c55..eb05ff1cd1 100644 --- a/vendor/github.com/vishvananda/netlink/nl/tc_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -1,6 +1,7 @@ package nl import ( + "encoding/binary" "unsafe" ) @@ -64,6 +65,15 @@ const ( TCA_PRIO_MAX = TCA_PRIO_MQ ) +const ( + TCA_STATS_UNSPEC = iota + TCA_STATS_BASIC + TCA_STATS_RATE_EST + TCA_STATS_QUEUE + TCA_STATS_APP + TCA_STATS_MAX = TCA_STATS_APP +) + const ( SizeofTcMsg = 0x14 SizeofTcActionMsg = 0x04 @@ -79,8 +89,15 @@ const ( SizeofTcU32Key = 0x10 SizeofTcU32Sel = 0x10 // without keys SizeofTcGen = 0x14 + SizeofTcConnmark = SizeofTcGen + 0x04 + SizeofTcCsum = SizeofTcGen + 0x04 SizeofTcMirred = SizeofTcGen + 0x08 + SizeofTcTunnelKey = SizeofTcGen + 0x04 + SizeofTcSkbEdit = SizeofTcGen SizeofTcPolice = 2*SizeofTcRateSpec + 0x20 + SizeofTcSfqQopt = 0x0b + SizeofTcSfqRedStats = 0x18 + SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c ) // struct tcmsg { @@ -412,6 +429,57 @@ func (x *TcHtbGlob) Serialize() []byte { return (*(*[SizeofTcHtbGlob]byte)(unsafe.Pointer(x)))[:] } +// HFSC + +type Curve struct { + m1 uint32 + d uint32 + m2 uint32 +} + +type HfscCopt struct { + Rsc Curve + Fsc Curve + Usc Curve +} + +func (c *Curve) Attrs() (uint32, uint32, uint32) { + return c.m1, c.d, c.m2 +} + +func (c *Curve) Set(m1 uint32, d uint32, m2 uint32) { + c.m1 = m1 + c.d = d + c.m2 = m2 +} + +func DeserializeHfscCurve(b []byte) *Curve { + return &Curve{ + m1: binary.LittleEndian.Uint32(b[0:4]), + d: binary.LittleEndian.Uint32(b[4:8]), + m2: binary.LittleEndian.Uint32(b[8:12]), + } +} + +func SerializeHfscCurve(c *Curve) (b []byte) { + t := make([]byte, binary.MaxVarintLen32) + binary.LittleEndian.PutUint32(t, c.m1) + b = append(b, t[:4]...) + binary.LittleEndian.PutUint32(t, c.d) + b = append(b, t[:4]...) + binary.LittleEndian.PutUint32(t, c.m2) + b = append(b, t[:4]...) + return b +} + +type TcHfscOpt struct { + Defcls uint16 +} + +func (x *TcHfscOpt) Serialize() []byte { + return (*(*[2]byte)(unsafe.Pointer(x)))[:] +} + const ( TCA_U32_UNSPEC = iota TCA_U32_CLASSID @@ -586,11 +654,77 @@ const ( TCA_BPF_FD TCA_BPF_NAME TCA_BPF_FLAGS - TCA_BPF_MAX = TCA_BPF_FLAGS + TCA_BPF_FLAGS_GEN + TCA_BPF_TAG + TCA_BPF_ID + TCA_BPF_MAX = TCA_BPF_ID ) type TcBpf TcGen +const ( + TCA_ACT_CONNMARK = 14 +) + +const ( + TCA_CONNMARK_UNSPEC = iota + TCA_CONNMARK_PARMS + TCA_CONNMARK_TM + TCA_CONNMARK_MAX = TCA_CONNMARK_TM +) + +// struct tc_connmark { +// tc_gen; +// __u16 zone; +// }; + +type TcConnmark struct { + TcGen + Zone uint16 +} + +func (msg *TcConnmark) Len() int { + return SizeofTcConnmark +} + +func DeserializeTcConnmark(b []byte) *TcConnmark { + return (*TcConnmark)(unsafe.Pointer(&b[0:SizeofTcConnmark][0])) +} + +func (x *TcConnmark) Serialize() []byte { + return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_CSUM_UNSPEC = iota + TCA_CSUM_PARMS + TCA_CSUM_TM + TCA_CSUM_PAD + TCA_CSUM_MAX = TCA_CSUM_PAD +) + +// struct tc_csum { +// tc_gen; +// __u32 update_flags; +// } + +type TcCsum struct { + TcGen + UpdateFlags uint32 +} + +func (msg *TcCsum) Len() int { + return SizeofTcCsum +} + +func DeserializeTcCsum(b []byte) *TcCsum { + return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0])) +} + +func (x *TcCsum) Serialize() []byte { + return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:] +} + const ( TCA_ACT_MIRRED = 8 ) @@ -626,6 +760,69 @@ func (x *TcMirred) Serialize() []byte { return (*(*[SizeofTcMirred]byte)(unsafe.Pointer(x)))[:] } +const ( + TCA_TUNNEL_KEY_UNSPEC = iota + TCA_TUNNEL_KEY_TM + TCA_TUNNEL_KEY_PARMS + TCA_TUNNEL_KEY_ENC_IPV4_SRC + TCA_TUNNEL_KEY_ENC_IPV4_DST + TCA_TUNNEL_KEY_ENC_IPV6_SRC + TCA_TUNNEL_KEY_ENC_IPV6_DST + TCA_TUNNEL_KEY_ENC_KEY_ID + TCA_TUNNEL_KEY_PAD + TCA_TUNNEL_KEY_ENC_DST_PORT + TCA_TUNNEL_KEY_NO_CSUM + TCA_TUNNEL_KEY_ENC_OPTS + TCA_TUNNEL_KEY_ENC_TOS + TCA_TUNNEL_KEY_ENC_TTL + TCA_TUNNEL_KEY_MAX +) + +type TcTunnelKey struct { + TcGen + Action int32 +} + +func (x *TcTunnelKey) Len() int { + return SizeofTcTunnelKey +} + +func DeserializeTunnelKey(b []byte) *TcTunnelKey { + return (*TcTunnelKey)(unsafe.Pointer(&b[0:SizeofTcTunnelKey][0])) +} + +func (x *TcTunnelKey) Serialize() []byte { + return (*(*[SizeofTcTunnelKey]byte)(unsafe.Pointer(x)))[:] +} + +const ( + TCA_SKBEDIT_UNSPEC = iota + TCA_SKBEDIT_TM + TCA_SKBEDIT_PARMS + TCA_SKBEDIT_PRIORITY + TCA_SKBEDIT_QUEUE_MAPPING + TCA_SKBEDIT_MARK + TCA_SKBEDIT_PAD + TCA_SKBEDIT_PTYPE + TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK +) + +type TcSkbEdit struct { + TcGen +} + +func (x *TcSkbEdit) Len() int { + return SizeofTcSkbEdit +} + +func DeserializeSkbEdit(b []byte) *TcSkbEdit { + return (*TcSkbEdit)(unsafe.Pointer(&b[0:SizeofTcSkbEdit][0])) +} + +func (x *TcSkbEdit) Serialize() []byte { + return (*(*[SizeofTcSkbEdit]byte)(unsafe.Pointer(x)))[:] +} + // struct tc_police { // __u32 index; // int action; @@ -673,3 +870,250 @@ const ( TCA_FW_MASK TCA_FW_MAX = TCA_FW_MASK ) + +const ( + TCA_MATCHALL_UNSPEC = iota + TCA_MATCHALL_CLASSID + TCA_MATCHALL_ACT + TCA_MATCHALL_FLAGS +) + +const ( + TCA_FQ_UNSPEC = iota + TCA_FQ_PLIMIT // limit of total number of packets in queue + TCA_FQ_FLOW_PLIMIT // limit of packets per flow + TCA_FQ_QUANTUM // RR quantum + TCA_FQ_INITIAL_QUANTUM // RR quantum for new flow + TCA_FQ_RATE_ENABLE // enable/disable rate limiting + TCA_FQ_FLOW_DEFAULT_RATE // obsolete do not use + TCA_FQ_FLOW_MAX_RATE // per flow max rate + TCA_FQ_BUCKETS_LOG // log2(number of buckets) + TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec + TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes + TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate +) + +const ( + TCA_FQ_CODEL_UNSPEC = iota + TCA_FQ_CODEL_TARGET + TCA_FQ_CODEL_LIMIT + TCA_FQ_CODEL_INTERVAL + TCA_FQ_CODEL_ECN + TCA_FQ_CODEL_FLOWS + TCA_FQ_CODEL_QUANTUM + TCA_FQ_CODEL_CE_THRESHOLD + TCA_FQ_CODEL_DROP_BATCH_SIZE + TCA_FQ_CODEL_MEMORY_LIMIT +) + +const ( + TCA_HFSC_UNSPEC = iota + TCA_HFSC_RSC + TCA_HFSC_FSC + TCA_HFSC_USC +) + +const ( + TCA_FLOWER_UNSPEC = iota + TCA_FLOWER_CLASSID + TCA_FLOWER_INDEV + TCA_FLOWER_ACT + TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE /* be16 */ + TCA_FLOWER_KEY_IP_PROTO /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC /* be16 */ + TCA_FLOWER_KEY_TCP_DST /* be16 */ + TCA_FLOWER_KEY_UDP_SRC /* be16 */ + TCA_FLOWER_KEY_UDP_DST /* be16 */ + + TCA_FLOWER_FLAGS + TCA_FLOWER_KEY_VLAN_ID /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC /* be16 */ + TCA_FLOWER_KEY_SCTP_DST /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */ + + TCA_FLOWER_KEY_FLAGS /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */ + + TCA_FLOWER_KEY_ARP_SIP /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_TIP /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_OP /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */ + TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */ + + TCA_FLOWER_KEY_IP_TOS /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_IP_TTL /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS + TCA_FLOWER_KEY_ENC_OPTS_MASK + + __TCA_FLOWER_MAX +) + +// struct tc_sfq_qopt { +// unsigned quantum; /* Bytes per round allocated to flow */ +// int perturb_period; /* Period of hash perturbation */ +// __u32 limit; /* Maximal packets in queue */ +// unsigned divisor; /* Hash divisor */ +// unsigned flows; /* Maximal number of flows */ +// }; + +type TcSfqQopt struct { + Quantum uint8 + Perturb int32 + Limit uint32 + Divisor uint8 + Flows uint8 +} + +func (x *TcSfqQopt) Len() int { + return SizeofTcSfqQopt +} + +func DeserializeTcSfqQopt(b []byte) *TcSfqQopt { + return (*TcSfqQopt)(unsafe.Pointer(&b[0:SizeofTcSfqQopt][0])) +} + +func (x *TcSfqQopt) Serialize() []byte { + return (*(*[SizeofTcSfqQopt]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfqred_stats { +// __u32 prob_drop; /* Early drops, below max threshold */ +// __u32 forced_drop; /* Early drops, after max threshold */ +// __u32 prob_mark; /* Marked packets, below max threshold */ +// __u32 forced_mark; /* Marked packets, after max threshold */ +// __u32 prob_mark_head; /* Marked packets, below max threshold */ +// __u32 forced_mark_head;/* Marked packets, after max threshold */ +// }; +type TcSfqRedStats struct { + ProbDrop uint32 + ForcedDrop uint32 + ProbMark uint32 + ForcedMark uint32 + ProbMarkHead uint32 + ForcedMarkHead uint32 +} + +func (x *TcSfqRedStats) Len() int { + return SizeofTcSfqRedStats +} + +func DeserializeTcSfqRedStats(b []byte) *TcSfqRedStats { + return (*TcSfqRedStats)(unsafe.Pointer(&b[0:SizeofTcSfqRedStats][0])) +} + +func (x *TcSfqRedStats) Serialize() []byte { + return (*(*[SizeofTcSfqRedStats]byte)(unsafe.Pointer(x)))[:] +} + +// struct tc_sfq_qopt_v1 { +// struct tc_sfq_qopt v0; +// unsigned int depth; /* max number of packets per flow */ +// unsigned int headdrop; +// /* SFQRED parameters */ +// __u32 limit; /* HARD maximal flow queue length (bytes) */ +// __u32 qth_min; /* Min average length threshold (bytes) */ +// __u32 qth_max; /* Max average length threshold (bytes) */ +// unsigned char Wlog; /* log(W) */ +// unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ +// unsigned char Scell_log; /* cell size for idle damping */ +// unsigned char flags; +// __u32 max_P; /* probability, high resolution */ +// /* SFQRED stats */ +// struct tc_sfqred_stats stats; +// }; +type TcSfqQoptV1 struct { + TcSfqQopt + Depth uint32 + HeadDrop uint32 + Limit uint32 + QthMin uint32 + QthMax uint32 + Wlog byte + Plog byte + ScellLog byte + Flags byte + MaxP uint32 + TcSfqRedStats +} + +func (x *TcSfqQoptV1) Len() int { + return SizeofTcSfqQoptV1 +} + +func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { + return (*TcSfqQoptV1)(unsafe.Pointer(&b[0:SizeofTcSfqQoptV1][0])) +} + +func (x *TcSfqQoptV1) Serialize() []byte { + return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] +} diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go index 09a2ffa10e..dce9073f7b 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -50,34 +50,44 @@ const ( // Attribute types const ( /* Netlink message attributes. */ - XFRMA_UNSPEC = 0x00 - XFRMA_ALG_AUTH = 0x01 /* struct xfrm_algo */ - XFRMA_ALG_CRYPT = 0x02 /* struct xfrm_algo */ - XFRMA_ALG_COMP = 0x03 /* struct xfrm_algo */ - XFRMA_ENCAP = 0x04 /* struct xfrm_algo + struct xfrm_encap_tmpl */ - XFRMA_TMPL = 0x05 /* 1 or more struct xfrm_user_tmpl */ - XFRMA_SA = 0x06 /* struct xfrm_usersa_info */ - XFRMA_POLICY = 0x07 /* struct xfrm_userpolicy_info */ - XFRMA_SEC_CTX = 0x08 /* struct xfrm_sec_ctx */ - XFRMA_LTIME_VAL = 0x09 - XFRMA_REPLAY_VAL = 0x0a - XFRMA_REPLAY_THRESH = 0x0b - XFRMA_ETIMER_THRESH = 0x0c - XFRMA_SRCADDR = 0x0d /* xfrm_address_t */ - XFRMA_COADDR = 0x0e /* xfrm_address_t */ - XFRMA_LASTUSED = 0x0f /* unsigned long */ - XFRMA_POLICY_TYPE = 0x10 /* struct xfrm_userpolicy_type */ - XFRMA_MIGRATE = 0x11 - XFRMA_ALG_AEAD = 0x12 /* struct xfrm_algo_aead */ - XFRMA_KMADDRESS = 0x13 /* struct xfrm_user_kmaddress */ - XFRMA_ALG_AUTH_TRUNC = 0x14 /* struct xfrm_algo_auth */ - XFRMA_MARK = 0x15 /* struct xfrm_mark */ - XFRMA_TFCPAD = 0x16 /* __u32 */ - XFRMA_REPLAY_ESN_VAL = 0x17 /* struct xfrm_replay_esn */ - XFRMA_SA_EXTRA_FLAGS = 0x18 /* __u32 */ - XFRMA_MAX = 0x18 + XFRMA_UNSPEC = iota + XFRMA_ALG_AUTH /* struct xfrm_algo */ + XFRMA_ALG_CRYPT /* struct xfrm_algo */ + XFRMA_ALG_COMP /* struct xfrm_algo */ + XFRMA_ENCAP /* struct xfrm_algo + struct xfrm_encap_tmpl */ + XFRMA_TMPL /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA /* struct xfrm_usersa_info */ + XFRMA_POLICY /* struct xfrm_userpolicy_info */ + XFRMA_SEC_CTX /* struct xfrm_sec_ctx */ + XFRMA_LTIME_VAL + XFRMA_REPLAY_VAL + XFRMA_REPLAY_THRESH + XFRMA_ETIMER_THRESH + XFRMA_SRCADDR /* xfrm_address_t */ + XFRMA_COADDR /* xfrm_address_t */ + XFRMA_LASTUSED /* unsigned long */ + XFRMA_POLICY_TYPE /* struct xfrm_userpolicy_type */ + XFRMA_MIGRATE + XFRMA_ALG_AEAD /* struct xfrm_algo_aead */ + XFRMA_KMADDRESS /* struct xfrm_user_kmaddress */ + XFRMA_ALG_AUTH_TRUNC /* struct xfrm_algo_auth */ + XFRMA_MARK /* struct xfrm_mark */ + XFRMA_TFCPAD /* __u32 */ + XFRMA_REPLAY_ESN_VAL /* struct xfrm_replay_esn */ + XFRMA_SA_EXTRA_FLAGS /* __u32 */ + XFRMA_PROTO /* __u8 */ + XFRMA_ADDRESS_FILTER /* struct xfrm_address_filter */ + XFRMA_PAD + XFRMA_OFFLOAD_DEV /* struct xfrm_state_offload */ + XFRMA_SET_MARK /* __u32 */ + XFRMA_SET_MARK_MASK /* __u32 */ + XFRMA_IF_ID /* __u32 */ + + XFRMA_MAX = iota - 1 ) +const XFRMA_OUTPUT_MARK = XFRMA_SET_MARK + const ( SizeofXfrmAddress = 0x10 SizeofXfrmSelector = 0x38 diff --git a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go index b6290fd544..43a947f229 100644 --- a/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -13,7 +13,7 @@ const ( SizeofXfrmAlgoAuth = 0x48 SizeofXfrmAlgoAEAD = 0x48 SizeofXfrmEncapTmpl = 0x18 - SizeofXfrmUsersaFlush = 0x8 + SizeofXfrmUsersaFlush = 0x1 SizeofXfrmReplayStateEsn = 0x18 ) diff --git a/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/vendor/github.com/vishvananda/netlink/proc_event_linux.go new file mode 100644 index 0000000000..53bc59a6ec --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -0,0 +1,217 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const CN_IDX_PROC = 0x1 + +const ( + PROC_EVENT_NONE = 0x00000000 + PROC_EVENT_FORK = 0x00000001 + PROC_EVENT_EXEC = 0x00000002 + PROC_EVENT_UID = 0x00000004 + PROC_EVENT_GID = 0x00000040 + PROC_EVENT_SID = 0x00000080 + PROC_EVENT_PTRACE = 0x00000100 + PROC_EVENT_COMM = 0x00000200 + PROC_EVENT_COREDUMP = 0x40000000 + PROC_EVENT_EXIT = 0x80000000 +) + +const ( + CN_VAL_PROC = 0x1 + PROC_CN_MCAST_LISTEN = 0x1 +) + +type ProcEventMsg interface { + Pid() uint32 + Tgid() uint32 +} + +type ProcEventHeader struct { + What uint32 + CPU uint32 + Timestamp uint64 +} + +type ProcEvent struct { + ProcEventHeader + Msg ProcEventMsg +} + +func (pe *ProcEvent) setHeader(h ProcEventHeader) { + pe.What = h.What + pe.CPU = h.CPU + pe.Timestamp = h.Timestamp +} + +type ExitProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +type ExitProcEvent2 struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +func (e *ExitProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExitProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ExecProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 +} + +func (e *ExecProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExecProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ForkProcEvent struct { + ParentPid uint32 + ParentTgid uint32 + ChildPid uint32 + ChildTgid uint32 +} + +func (e *ForkProcEvent) Pid() uint32 { + return e.ParentPid +} + +func (e *ForkProcEvent) Tgid() uint32 { + return e.ParentTgid +} + +type CommProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + Comm [16]byte +} + +func (e *CommProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *CommProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error { + h, err := NewHandle() + if err != nil { + return err + } + defer h.Delete() + + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC) + if err != nil { + return err + } + + var nlmsg nl.NetlinkRequest + + nlmsg.Pid = uint32(os.Getpid()) + nlmsg.Type = unix.NLMSG_DONE + nlmsg.Len = uint32(unix.SizeofNlMsghdr) + + cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN) + nlmsg.AddData(cm) + + s.Send(&nlmsg) + + if done != nil { + go func() { + <-done + s.Close() + }() + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + + for _, m := range msgs { + e := parseNetlinkMessage(m) + if e != nil { + ch <- *e + } + } + + } + }() + + return nil +} + +func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent { + if m.Header.Type == unix.NLMSG_DONE { + buf := bytes.NewBuffer(m.Data) + msg := &nl.CnMsg{} + hdr := &ProcEventHeader{} + binary.Read(buf, nl.NativeEndian(), msg) + binary.Read(buf, nl.NativeEndian(), hdr) + + pe := &ProcEvent{} + pe.setHeader(*hdr) + switch hdr.What { + case PROC_EVENT_EXIT: + event := &ExitProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_FORK: + event := &ForkProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_EXEC: + event := &ExecProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_COMM: + event := &CommProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + } + return nil + } + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/protinfo.go b/vendor/github.com/vishvananda/netlink/protinfo.go index 0087c4438b..60b23b3742 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo.go +++ b/vendor/github.com/vishvananda/netlink/protinfo.go @@ -18,6 +18,10 @@ type Protinfo struct { // String returns a list of enabled flags func (prot *Protinfo) String() string { + if prot == nil { + return "" + } + var boolStrings []string if prot.Hairpin { boolStrings = append(boolStrings, "Hairpin") diff --git a/vendor/github.com/vishvananda/netlink/protinfo_linux.go b/vendor/github.com/vishvananda/netlink/protinfo_linux.go index 10dd0d5335..15b65123ce 100644 --- a/vendor/github.com/vishvananda/netlink/protinfo_linux.go +++ b/vendor/github.com/vishvananda/netlink/protinfo_linux.go @@ -5,6 +5,7 @@ import ( "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) func LinkGetProtinfo(link Link) (Protinfo, error) { @@ -15,10 +16,10 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) { base := link.Attrs() h.ensureIndex(base) var pi Protinfo - req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP) - msg := nl.NewIfInfomsg(syscall.AF_BRIDGE) + req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP) + msg := nl.NewIfInfomsg(unix.AF_BRIDGE) req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0) + msgs, err := req.Execute(unix.NETLINK_ROUTE, 0) if err != nil { return pi, err } @@ -33,14 +34,14 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) { return pi, err } for _, attr := range attrs { - if attr.Attr.Type != syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED { + if attr.Attr.Type != unix.IFLA_PROTINFO|unix.NLA_F_NESTED { continue } infos, err := nl.ParseRouteAttr(attr.Value) if err != nil { return pi, err } - pi = *parseProtinfo(infos) + pi = parseProtinfo(infos) return pi, nil } @@ -48,8 +49,7 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) { return pi, fmt.Errorf("Device with index %d not found", base.Index) } -func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo { - var pi Protinfo +func parseProtinfo(infos []syscall.NetlinkRouteAttr) (pi Protinfo) { for _, info := range infos { switch info.Attr.Type { case nl.IFLA_BRPORT_MODE: @@ -70,5 +70,5 @@ func parseProtinfo(infos []syscall.NetlinkRouteAttr) *Protinfo { pi.ProxyArpWiFi = byteToBool(info.Value[0]) } } - return &pi + return } diff --git a/vendor/github.com/vishvananda/netlink/qdisc.go b/vendor/github.com/vishvananda/netlink/qdisc.go index 0ca86ebe89..f594c9c212 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc.go +++ b/vendor/github.com/vishvananda/netlink/qdisc.go @@ -176,6 +176,13 @@ type Netem struct { CorruptCorr uint32 } +func (netem *Netem) String() string { + return fmt.Sprintf( + "{Latency: %v, Limit: %v, Loss: %v, Gap: %v, Duplicate: %v, Jitter: %v}", + netem.Latency, netem.Limit, netem.Loss, netem.Gap, netem.Duplicate, netem.Jitter, + ) +} + func (qdisc *Netem) Attrs() *QdiscAttrs { return &qdisc.QdiscAttrs } @@ -230,3 +237,130 @@ func (qdisc *GenericQdisc) Attrs() *QdiscAttrs { func (qdisc *GenericQdisc) Type() string { return qdisc.QdiscType } + +type Hfsc struct { + QdiscAttrs + Defcls uint16 +} + +func NewHfsc(attrs QdiscAttrs) *Hfsc { + return &Hfsc{ + QdiscAttrs: attrs, + Defcls: 1, + } +} + +func (hfsc *Hfsc) Attrs() *QdiscAttrs { + return &hfsc.QdiscAttrs +} + +func (hfsc *Hfsc) Type() string { + return "hfsc" +} + +func (hfsc *Hfsc) String() string { + return fmt.Sprintf( + "{%v -- default: %d}", + hfsc.Attrs(), hfsc.Defcls, + ) +} + +// Fq is a classless packet scheduler meant to be mostly used for locally generated traffic. +type Fq struct { + QdiscAttrs + PacketLimit uint32 + FlowPacketLimit uint32 + // In bytes + Quantum uint32 + InitialQuantum uint32 + // called RateEnable under the hood + Pacing uint32 + FlowDefaultRate uint32 + FlowMaxRate uint32 + // called BucketsLog under the hood + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 +} + +func (fq *Fq) String() string { + return fmt.Sprintf( + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + ) +} + +func NewFq(attrs QdiscAttrs) *Fq { + return &Fq{ + QdiscAttrs: attrs, + Pacing: 1, + } +} + +func (qdisc *Fq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Fq) Type() string { + return "fq" +} + +// FQ_Codel (Fair Queuing Controlled Delay) is queuing discipline that combines Fair Queuing with the CoDel AQM scheme. +type FqCodel struct { + QdiscAttrs + Target uint32 + Limit uint32 + Interval uint32 + ECN uint32 + Flows uint32 + Quantum uint32 + CEThreshold uint32 + DropBatchSize uint32 + MemoryLimit uint32 +} + +func (fqcodel *FqCodel) String() string { + return fmt.Sprintf( + "{%v -- Target: %v, Limit: %v, Interval: %v, ECM: %v, Flows: %v, Quantum: %v}", + fqcodel.Attrs(), fqcodel.Target, fqcodel.Limit, fqcodel.Interval, fqcodel.ECN, fqcodel.Flows, fqcodel.Quantum, + ) +} + +func NewFqCodel(attrs QdiscAttrs) *FqCodel { + return &FqCodel{ + QdiscAttrs: attrs, + ECN: 1, + } +} + +func (qdisc *FqCodel) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *FqCodel) Type() string { + return "fq_codel" +} + +type Sfq struct { + QdiscAttrs + // TODO: Only the simplified options for SFQ are handled here. Support for the extended one can be added later. + Quantum uint8 + Perturb uint8 + Limit uint32 + Divisor uint8 +} + +func (sfq *Sfq) String() string { + return fmt.Sprintf( + "{%v -- Quantum: %v, Perturb: %v, Limit: %v, Divisor: %v}", + sfq.Attrs(), sfq.Quantum, sfq.Perturb, sfq.Limit, sfq.Divisor, + ) +} + +func (qdisc *Sfq) Attrs() *QdiscAttrs { + return &qdisc.QdiscAttrs +} + +func (qdisc *Sfq) Type() string { + return "sfq" +} diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go index 2c0deddb32..e182e1cfe6 100644 --- a/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -8,6 +8,7 @@ import ( "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) // NOTE function is here because it uses other linux functions @@ -84,7 +85,7 @@ func QdiscDel(qdisc Qdisc) error { // QdiscDel will delete a qdisc from the system. // Equivalent to: `tc qdisc del $qdisc` func (h *Handle) QdiscDel(qdisc Qdisc) error { - return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc) + return h.qdiscModify(unix.RTM_DELQDISC, 0, qdisc) } // QdiscChange will change a qdisc in place @@ -98,7 +99,7 @@ func QdiscChange(qdisc Qdisc) error { // Equivalent to: `tc qdisc change $qdisc` // The parent and handle MUST NOT be changed. func (h *Handle) QdiscChange(qdisc Qdisc) error { - return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc) + return h.qdiscModify(unix.RTM_NEWQDISC, 0, qdisc) } // QdiscReplace will replace a qdisc to the system. @@ -113,8 +114,8 @@ func QdiscReplace(qdisc Qdisc) error { // The handle MUST change. func (h *Handle) QdiscReplace(qdisc Qdisc) error { return h.qdiscModify( - syscall.RTM_NEWQDISC, - syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE, + unix.RTM_NEWQDISC, + unix.NLM_F_CREATE|unix.NLM_F_REPLACE, qdisc) } @@ -128,13 +129,13 @@ func QdiscAdd(qdisc Qdisc) error { // Equivalent to: `tc qdisc add $qdisc` func (h *Handle) QdiscAdd(qdisc Qdisc) error { return h.qdiscModify( - syscall.RTM_NEWQDISC, - syscall.NLM_F_CREATE|syscall.NLM_F_EXCL, + unix.RTM_NEWQDISC, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, qdisc) } func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { - req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) base := qdisc.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -145,13 +146,13 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { req.AddData(msg) // When deleting don't bother building the rest of the netlink payload - if cmd != syscall.RTM_DELQDISC { + if cmd != unix.RTM_DELQDISC { if err := qdiscPayload(req, qdisc); err != nil { return err } } - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -160,78 +161,146 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) - if prio, ok := qdisc.(*Prio); ok { + + switch qdisc := qdisc.(type) { + case *Prio: tcmap := nl.TcPrioMap{ - Bands: int32(prio.Bands), - Priomap: prio.PriorityMap, + Bands: int32(qdisc.Bands), + Priomap: qdisc.PriorityMap, } options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) - } else if tbf, ok := qdisc.(*Tbf); ok { + case *Tbf: opt := nl.TcTbfQopt{} - opt.Rate.Rate = uint32(tbf.Rate) - opt.Peakrate.Rate = uint32(tbf.Peakrate) - opt.Limit = tbf.Limit - opt.Buffer = tbf.Buffer - nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) - if tbf.Rate >= uint64(1<<32) { - nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate)) + opt.Rate.Rate = uint32(qdisc.Rate) + opt.Peakrate.Rate = uint32(qdisc.Peakrate) + opt.Limit = qdisc.Limit + opt.Buffer = qdisc.Buffer + options.AddRtAttr(nl.TCA_TBF_PARMS, opt.Serialize()) + if qdisc.Rate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate)) } - if tbf.Peakrate >= uint64(1<<32) { - nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate)) + if qdisc.Peakrate >= uint64(1<<32) { + options.AddRtAttr(nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate)) } - if tbf.Peakrate > 0 { - nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst)) + if qdisc.Peakrate > 0 { + options.AddRtAttr(nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst)) } - } else if htb, ok := qdisc.(*Htb); ok { + case *Htb: opt := nl.TcHtbGlob{} - opt.Version = htb.Version - opt.Rate2Quantum = htb.Rate2Quantum - opt.Defcls = htb.Defcls + opt.Version = qdisc.Version + opt.Rate2Quantum = qdisc.Rate2Quantum + opt.Defcls = qdisc.Defcls // TODO: Handle Debug properly. For now default to 0 - opt.Debug = htb.Debug - opt.DirectPkts = htb.DirectPkts - nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize()) - // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) - } else if netem, ok := qdisc.(*Netem); ok { + opt.Debug = qdisc.Debug + opt.DirectPkts = qdisc.DirectPkts + options.AddRtAttr(nl.TCA_HTB_INIT, opt.Serialize()) + // options.AddRtAttr(nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + case *Hfsc: + opt := nl.TcHfscOpt{} + opt.Defcls = qdisc.Defcls + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + case *Netem: opt := nl.TcNetemQopt{} - opt.Latency = netem.Latency - opt.Limit = netem.Limit - opt.Loss = netem.Loss - opt.Gap = netem.Gap - opt.Duplicate = netem.Duplicate - opt.Jitter = netem.Jitter + opt.Latency = qdisc.Latency + opt.Limit = qdisc.Limit + opt.Loss = qdisc.Loss + opt.Gap = qdisc.Gap + opt.Duplicate = qdisc.Duplicate + opt.Jitter = qdisc.Jitter options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) // Correlation corr := nl.TcNetemCorr{} - corr.DelayCorr = netem.DelayCorr - corr.LossCorr = netem.LossCorr - corr.DupCorr = netem.DuplicateCorr + corr.DelayCorr = qdisc.DelayCorr + corr.LossCorr = qdisc.LossCorr + corr.DupCorr = qdisc.DuplicateCorr if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 { - nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize()) + options.AddRtAttr(nl.TCA_NETEM_CORR, corr.Serialize()) } // Corruption corruption := nl.TcNetemCorrupt{} - corruption.Probability = netem.CorruptProb - corruption.Correlation = netem.CorruptCorr + corruption.Probability = qdisc.CorruptProb + corruption.Correlation = qdisc.CorruptCorr if corruption.Probability > 0 { - nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize()) + options.AddRtAttr(nl.TCA_NETEM_CORRUPT, corruption.Serialize()) } // Reorder reorder := nl.TcNetemReorder{} - reorder.Probability = netem.ReorderProb - reorder.Correlation = netem.ReorderCorr + reorder.Probability = qdisc.ReorderProb + reorder.Correlation = qdisc.ReorderCorr if reorder.Probability > 0 { - nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize()) + options.AddRtAttr(nl.TCA_NETEM_REORDER, reorder.Serialize()) } - } else if _, ok := qdisc.(*Ingress); ok { + case *Ingress: // ingress filters must use the proper handle if qdisc.Attrs().Parent != HANDLE_INGRESS { return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") } + case *FqCodel: + options.AddRtAttr(nl.TCA_FQ_CODEL_ECN, nl.Uint32Attr((uint32(qdisc.ECN)))) + if qdisc.Limit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_LIMIT, nl.Uint32Attr((uint32(qdisc.Limit)))) + } + if qdisc.Interval > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_INTERVAL, nl.Uint32Attr((uint32(qdisc.Interval)))) + } + if qdisc.Flows > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_FLOWS, nl.Uint32Attr((uint32(qdisc.Flows)))) + } + if qdisc.Quantum > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) + } + if qdisc.CEThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_CE_THRESHOLD, nl.Uint32Attr(qdisc.CEThreshold)) + } + if qdisc.DropBatchSize > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_DROP_BATCH_SIZE, nl.Uint32Attr(qdisc.DropBatchSize)) + } + if qdisc.MemoryLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_CODEL_MEMORY_LIMIT, nl.Uint32Attr(qdisc.MemoryLimit)) + } + case *Fq: + options.AddRtAttr(nl.TCA_FQ_RATE_ENABLE, nl.Uint32Attr((uint32(qdisc.Pacing)))) + + if qdisc.Buckets > 0 { + options.AddRtAttr(nl.TCA_FQ_BUCKETS_LOG, nl.Uint32Attr((uint32(qdisc.Buckets)))) + } + if qdisc.LowRateThreshold > 0 { + options.AddRtAttr(nl.TCA_FQ_LOW_RATE_THRESHOLD, nl.Uint32Attr((uint32(qdisc.LowRateThreshold)))) + } + if qdisc.Quantum > 0 { + options.AddRtAttr(nl.TCA_FQ_QUANTUM, nl.Uint32Attr((uint32(qdisc.Quantum)))) + } + if qdisc.InitialQuantum > 0 { + options.AddRtAttr(nl.TCA_FQ_INITIAL_QUANTUM, nl.Uint32Attr((uint32(qdisc.InitialQuantum)))) + } + if qdisc.FlowRefillDelay > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_REFILL_DELAY, nl.Uint32Attr((uint32(qdisc.FlowRefillDelay)))) + } + if qdisc.FlowPacketLimit > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_PLIMIT, nl.Uint32Attr((uint32(qdisc.FlowPacketLimit)))) + } + if qdisc.FlowMaxRate > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_MAX_RATE, nl.Uint32Attr((uint32(qdisc.FlowMaxRate)))) + } + if qdisc.FlowDefaultRate > 0 { + options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) + } + case *Sfq: + opt := nl.TcSfqQoptV1{} + opt.TcSfqQopt.Quantum = qdisc.Quantum + opt.TcSfqQopt.Perturb = int32(qdisc.Perturb) + opt.TcSfqQopt.Limit = qdisc.Limit + opt.TcSfqQopt.Divisor = qdisc.Divisor + + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + default: + options = nil } - req.AddData(options) + if options != nil { + req.AddData(options) + } return nil } @@ -246,7 +315,7 @@ func QdiscList(link Link) ([]Qdisc, error) { // Equivalent to: `tc qdisc show`. // The list can be filtered by link. func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { - req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(unix.RTM_GETQDISC, unix.NLM_F_DUMP) index := int32(0) if link != nil { base := link.Attrs() @@ -259,7 +328,7 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { } req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWQDISC) if err != nil { return nil, err } @@ -301,8 +370,16 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { qdisc = &Ingress{} case "htb": qdisc = &Htb{} + case "fq": + qdisc = &Fq{} + case "hfsc": + qdisc = &Hfsc{} + case "fq_codel": + qdisc = &FqCodel{} case "netem": qdisc = &Netem{} + case "sfq": + qdisc = &Sfq{} default: qdisc = &GenericQdisc{QdiscType: qdiscType} } @@ -326,6 +403,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { if err := parseTbfData(qdisc, data); err != nil { return nil, err } + case "hfsc": + if err := parseHfscData(qdisc, attr.Value); err != nil { + return nil, err + } case "htb": data, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -334,10 +415,30 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { if err := parseHtbData(qdisc, data); err != nil { return nil, err } + case "fq": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseFqData(qdisc, data); err != nil { + return nil, err + } + case "fq_codel": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseFqCodelData(qdisc, data); err != nil { + return nil, err + } case "netem": if err := parseNetemData(qdisc, attr.Value); err != nil { return nil, err } + case "sfq": + if err := parseSfqData(qdisc, attr.Value); err != nil { + return nil, err + } // no options for ingress } @@ -367,7 +468,6 @@ func parsePrioData(qdisc Qdisc, value []byte) error { } func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() htb := qdisc.(*Htb) for _, datum := range data { switch datum.Attr.Type { @@ -386,6 +486,71 @@ func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { return nil } +func parseFqCodelData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + fqCodel := qdisc.(*FqCodel) + for _, datum := range data { + + switch datum.Attr.Type { + case nl.TCA_FQ_CODEL_TARGET: + fqCodel.Target = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_LIMIT: + fqCodel.Limit = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_INTERVAL: + fqCodel.Interval = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_ECN: + fqCodel.ECN = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_FLOWS: + fqCodel.Flows = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_QUANTUM: + fqCodel.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_CE_THRESHOLD: + fqCodel.CEThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_DROP_BATCH_SIZE: + fqCodel.DropBatchSize = native.Uint32(datum.Value) + case nl.TCA_FQ_CODEL_MEMORY_LIMIT: + fqCodel.MemoryLimit = native.Uint32(datum.Value) + } + } + return nil +} + +func parseHfscData(qdisc Qdisc, data []byte) error { + Hfsc := qdisc.(*Hfsc) + Hfsc.Defcls = native.Uint16(data) + return nil +} + +func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + fq := qdisc.(*Fq) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FQ_BUCKETS_LOG: + fq.Buckets = native.Uint32(datum.Value) + case nl.TCA_FQ_LOW_RATE_THRESHOLD: + fq.LowRateThreshold = native.Uint32(datum.Value) + case nl.TCA_FQ_QUANTUM: + fq.Quantum = native.Uint32(datum.Value) + case nl.TCA_FQ_RATE_ENABLE: + fq.Pacing = native.Uint32(datum.Value) + case nl.TCA_FQ_INITIAL_QUANTUM: + fq.InitialQuantum = native.Uint32(datum.Value) + case nl.TCA_FQ_ORPHAN_MASK: + // TODO + case nl.TCA_FQ_FLOW_REFILL_DELAY: + fq.FlowRefillDelay = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_PLIMIT: + fq.FlowPacketLimit = native.Uint32(datum.Value) + case nl.TCA_FQ_PLIMIT: + fq.PacketLimit = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_MAX_RATE: + fq.FlowMaxRate = native.Uint32(datum.Value) + case nl.TCA_FQ_FLOW_DEFAULT_RATE: + fq.FlowDefaultRate = native.Uint32(datum.Value) + } + } + return nil +} + func parseNetemData(qdisc Qdisc, value []byte) error { netem := qdisc.(*Netem) opt := nl.DeserializeTcNetemQopt(value) @@ -420,7 +585,6 @@ func parseNetemData(qdisc Qdisc, value []byte) error { } func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { - native = nl.NativeEndian() tbf := qdisc.(*Tbf) for _, datum := range data { switch datum.Attr.Type { @@ -441,6 +605,17 @@ func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { return nil } +func parseSfqData(qdisc Qdisc, value []byte) error { + sfq := qdisc.(*Sfq) + opt := nl.DeserializeTcSfqQoptV1(value) + sfq.Quantum = opt.TcSfqQopt.Quantum + sfq.Perturb = uint8(opt.TcSfqQopt.Perturb) + sfq.Limit = opt.TcSfqQopt.Limit + sfq.Divisor = opt.TcSfqQopt.Divisor + + return nil +} + const ( TIME_UNITS_PER_SEC = 1000000 ) @@ -457,10 +632,10 @@ func initClock() { return } parts := strings.Split(strings.TrimSpace(string(data)), " ") - if len(parts) < 3 { + if len(parts) < 4 { return } - var vals [3]uint64 + var vals [4]uint64 for i := range vals { val, err := strconv.ParseUint(parts[i], 16, 32) if err != nil { @@ -474,7 +649,12 @@ func initClock() { } clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor - hz = float64(vals[0]) + if vals[2] == 1000000 { + // ref https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/lib/utils.c#n963 + hz = float64(vals[3]) + } else { + hz = 100 + } } func TickInUsec() float64 { @@ -522,6 +702,11 @@ func latency(rate uint64, limit, buffer uint32) float64 { return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) } -func Xmittime(rate uint64, size uint32) float64 { - return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +func Xmittime(rate uint64, size uint32) uint32 { + // https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62 + return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate)))) +} + +func Xmitsize(rate uint64, ticks uint32) uint32 { + return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC) } diff --git a/vendor/github.com/vishvananda/netlink/rdma_link_linux.go b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go new file mode 100644 index 0000000000..036399db6b --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/rdma_link_linux.go @@ -0,0 +1,331 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "net" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// LinkAttrs represents data shared by most link types +type RdmaLinkAttrs struct { + Index uint32 + Name string + FirmwareVersion string + NodeGuid string + SysImageGuid string +} + +// Link represents a rdma device from netlink. +type RdmaLink struct { + Attrs RdmaLinkAttrs +} + +func getProtoField(clientType int, op int) int { + return ((clientType << nl.RDMA_NL_GET_CLIENT_SHIFT) | op) +} + +func uint64ToGuidString(guid uint64) string { + //Convert to byte array + sysGuidBytes := new(bytes.Buffer) + binary.Write(sysGuidBytes, binary.LittleEndian, guid) + + //Convert to HardwareAddr + sysGuidNet := net.HardwareAddr(sysGuidBytes.Bytes()) + + //Get the String + return sysGuidNet.String() +} + +func executeOneGetRdmaLink(data []byte) (*RdmaLink, error) { + + link := RdmaLink{} + + reader := bytes.NewReader(data) + for reader.Len() >= 4 { + _, attrType, len, value := parseNfAttrTLV(reader) + + switch attrType { + case nl.RDMA_NLDEV_ATTR_DEV_INDEX: + var Index uint32 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &Index) + link.Attrs.Index = Index + case nl.RDMA_NLDEV_ATTR_DEV_NAME: + link.Attrs.Name = string(value[0 : len-1]) + case nl.RDMA_NLDEV_ATTR_FW_VERSION: + link.Attrs.FirmwareVersion = string(value[0 : len-1]) + case nl.RDMA_NLDEV_ATTR_NODE_GUID: + var guid uint64 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &guid) + link.Attrs.NodeGuid = uint64ToGuidString(guid) + case nl.RDMA_NLDEV_ATTR_SYS_IMAGE_GUID: + var sysGuid uint64 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &sysGuid) + link.Attrs.SysImageGuid = uint64ToGuidString(sysGuid) + } + if (len % 4) != 0 { + // Skip pad bytes + reader.Seek(int64(4-(len%4)), seekCurrent) + } + } + return &link, nil +} + +func execRdmaSetLink(req *nl.NetlinkRequest) error { + + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func RdmaLinkList() ([]*RdmaLink, error) { + return pkgHandle.RdmaLinkList() +} + +// RdmaLinkList gets a list of RDMA link devices. +// Equivalent to: `rdma dev show` +func (h *Handle) RdmaLinkList() ([]*RdmaLink, error) { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK|unix.NLM_F_DUMP) + + msgs, err := req.Execute(unix.NETLINK_RDMA, 0) + if err != nil { + return nil, err + } + + var res []*RdmaLink + for _, m := range msgs { + link, err := executeOneGetRdmaLink(m) + if err != nil { + return nil, err + } + res = append(res, link) + } + + return res, nil +} + +// RdmaLinkByName finds a link by name and returns a pointer to the object if +// found and nil error, otherwise returns error code. +func RdmaLinkByName(name string) (*RdmaLink, error) { + return pkgHandle.RdmaLinkByName(name) +} + +// RdmaLinkByName finds a link by name and returns a pointer to the object if +// found and nil error, otherwise returns error code. +func (h *Handle) RdmaLinkByName(name string) (*RdmaLink, error) { + links, err := h.RdmaLinkList() + if err != nil { + return nil, err + } + for _, link := range links { + if link.Attrs.Name == name { + return link, nil + } + } + return nil, fmt.Errorf("Rdma device %v not found", name) +} + +// RdmaLinkSetName sets the name of the rdma link device. Return nil on success +// or error otherwise. +// Equivalent to: `rdma dev set $old_devname name $name` +func RdmaLinkSetName(link *RdmaLink, name string) error { + return pkgHandle.RdmaLinkSetName(link, name) +} + +// RdmaLinkSetName sets the name of the rdma link device. Return nil on success +// or error otherwise. +// Equivalent to: `rdma dev set $old_devname name $name` +func (h *Handle) RdmaLinkSetName(link *RdmaLink, name string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs.Index)) + data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b) + req.AddData(data) + + b = make([]byte, len(name)+1) + copy(b, name) + data = nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, b) + req.AddData(data) + + return execRdmaSetLink(req) +} + +func netnsModeToString(mode uint8) string { + switch mode { + case 0: + return "exclusive" + case 1: + return "shared" + default: + return "unknown" + } +} + +func executeOneGetRdmaNetnsMode(data []byte) (string, error) { + reader := bytes.NewReader(data) + for reader.Len() >= 4 { + _, attrType, len, value := parseNfAttrTLV(reader) + + switch attrType { + case nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE: + var mode uint8 + r := bytes.NewReader(value) + binary.Read(r, nl.NativeEndian(), &mode) + return netnsModeToString(mode), nil + } + if (len % 4) != 0 { + // Skip pad bytes + reader.Seek(int64(4-(len%4)), seekCurrent) + } + } + return "", fmt.Errorf("Invalid netns mode") +} + +// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem +// Returns mode string and error status as nil on success or returns error +// otherwise. +// Equivalent to: `rdma system show netns' +func RdmaSystemGetNetnsMode() (string, error) { + return pkgHandle.RdmaSystemGetNetnsMode() +} + +// RdmaSystemGetNetnsMode gets the net namespace mode for RDMA subsystem +// Returns mode string and error status as nil on success or returns error +// otherwise. +// Equivalent to: `rdma system show netns' +func (h *Handle) RdmaSystemGetNetnsMode() (string, error) { + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_GET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + msgs, err := req.Execute(unix.NETLINK_RDMA, 0) + if err != nil { + return "", err + } + if len(msgs) == 0 { + return "", fmt.Errorf("No valid response from kernel") + } + return executeOneGetRdmaNetnsMode(msgs[0]) +} + +func netnsModeStringToUint8(mode string) (uint8, error) { + switch mode { + case "exclusive": + return 0, nil + case "shared": + return 1, nil + default: + return 0, fmt.Errorf("Invalid mode; %q", mode) + } +} + +// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem +// Returns nil on success or appropriate error code. +// Equivalent to: `rdma system set netns { shared | exclusive }' +func RdmaSystemSetNetnsMode(NewMode string) error { + return pkgHandle.RdmaSystemSetNetnsMode(NewMode) +} + +// RdmaSystemSetNetnsMode sets the net namespace mode for RDMA subsystem +// Returns nil on success or appropriate error code. +// Equivalent to: `rdma system set netns { shared | exclusive }' +func (h *Handle) RdmaSystemSetNetnsMode(NewMode string) error { + value, err := netnsModeStringToUint8(NewMode) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SYS_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + data := nl.NewRtAttr(nl.RDMA_NLDEV_SYS_ATTR_NETNS_MODE, []byte{value}) + req.AddData(data) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `rdma dev set $dev netns $ns` +func RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { + return pkgHandle.RdmaLinkSetNsFd(link, fd) +} + +// RdmaLinkSetNsFd puts the RDMA device into a new network namespace. The +// fd must be an open file descriptor to a network namespace. +// Similar to: `rdma dev set $dev netns $ns` +func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_SET) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + data := nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, + nl.Uint32Attr(link.Attrs.Index)) + req.AddData(data) + + data = nl.NewRtAttr(nl.RDMA_NLDEV_NET_NS_FD, nl.Uint32Attr(fd)) + req.AddData(data) + + return execRdmaSetLink(req) +} + +// RdmaLinkDel deletes an rdma link +// +// Similar to: rdma link delete NAME +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkDel(name string) error { + return pkgHandle.RdmaLinkDel(name) +} + +// RdmaLinkDel deletes an rdma link. +func (h *Handle) RdmaLinkDel(name string) error { + link, err := h.RdmaLinkByName(name) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, link.Attrs.Index) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +// Similar to: rdma link add NAME type TYPE netdev NETDEV +// NAME - specifies the new name of the rdma link to add +// TYPE - specifies which rdma type to use. Link types: +// rxe - Soft RoCE driver +// siw - Soft iWARP driver +// NETDEV - specifies the network device to which the link is bound +// +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkAdd(linkName, linkType, netdev string) error { + return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev) +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev))) + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} diff --git a/vendor/github.com/vishvananda/netlink/route.go b/vendor/github.com/vishvananda/netlink/route.go index 03ac4b2391..79cc218ec8 100644 --- a/vendor/github.com/vishvananda/netlink/route.go +++ b/vendor/github.com/vishvananda/netlink/route.go @@ -11,11 +11,30 @@ type Scope uint8 type NextHopFlag int +const ( + RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) + RT_FILTER_SCOPE + RT_FILTER_TYPE + RT_FILTER_TOS + RT_FILTER_IIF + RT_FILTER_OIF + RT_FILTER_DST + RT_FILTER_SRC + RT_FILTER_GW + RT_FILTER_TABLE + RT_FILTER_HOPLIMIT + RT_FILTER_PRIORITY + RT_FILTER_MARK + RT_FILTER_MASK + RT_FILTER_REALM +) + type Destination interface { Family() int Decode([]byte) error Encode() ([]byte, error) String() string + Equal(Destination) bool } type Encap interface { @@ -23,26 +42,49 @@ type Encap interface { Decode([]byte) error Encode() ([]byte, error) String() string + Equal(Encap) bool } +//Protocol describe what was the originator of the route +type RouteProtocol int + // Route represents a netlink route. type Route struct { - LinkIndex int - ILinkIndex int - Scope Scope - Dst *net.IPNet - Src net.IP - Gw net.IP - MultiPath []*NexthopInfo - Protocol int - Priority int - Table int - Type int - Tos int - Flags int - MPLSDst *int - NewDst Destination - Encap Encap + LinkIndex int + ILinkIndex int + Scope Scope + Dst *net.IPNet + Src net.IP + Gw net.IP + MultiPath []*NexthopInfo + Protocol RouteProtocol + Priority int + Family int + Table int + Type int + Tos int + Flags int + MPLSDst *int + NewDst Destination + Encap Encap + Via Destination + Realm int + MTU int + Window int + Rtt int + RttVar int + Ssthresh int + Cwnd int + AdvMSS int + Reordering int + Hoplimit int + InitCwnd int + Features int + RtoMin int + InitRwnd int + QuickACK int + Congctl string + FastOpenNoCookie int } func (r Route) String() string { @@ -61,6 +103,9 @@ func (r Route) String() string { if r.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", r.Encap)) } + if r.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", r.Via)) + } elems = append(elems, fmt.Sprintf("Src: %s", r.Src)) if len(r.MultiPath) > 0 { elems = append(elems, fmt.Sprintf("Gw: %s", r.MultiPath)) @@ -69,9 +114,32 @@ func (r Route) String() string { } elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags())) elems = append(elems, fmt.Sprintf("Table: %d", r.Table)) + elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm)) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } +func (r Route) Equal(x Route) bool { + return r.LinkIndex == x.LinkIndex && + r.ILinkIndex == x.ILinkIndex && + r.Scope == x.Scope && + ipNetEqual(r.Dst, x.Dst) && + r.Src.Equal(x.Src) && + r.Gw.Equal(x.Gw) && + nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && + r.Protocol == x.Protocol && + r.Priority == x.Priority && + r.Realm == x.Realm && + r.Table == x.Table && + r.Type == x.Type && + r.Tos == x.Tos && + r.Hoplimit == x.Hoplimit && + r.Flags == x.Flags && + (r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) && + (r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) && + (r.Via == x.Via || (r.Via != nil && r.Via.Equal(x.Via))) && + (r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap))) +} + func (r *Route) SetFlag(flag NextHopFlag) { r.Flags |= int(flag) } @@ -98,6 +166,7 @@ type NexthopInfo struct { Flags int NewDst Destination Encap Encap + Via Destination } func (n *NexthopInfo) String() string { @@ -109,8 +178,50 @@ func (n *NexthopInfo) String() string { if n.Encap != nil { elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap)) } + if n.Via != nil { + elems = append(elems, fmt.Sprintf("Via: %s", n.Via)) + } elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1)) - elems = append(elems, fmt.Sprintf("Gw: %d", n.Gw)) + elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw)) elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags())) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } + +func (n NexthopInfo) Equal(x NexthopInfo) bool { + return n.LinkIndex == x.LinkIndex && + n.Hops == x.Hops && + n.Gw.Equal(x.Gw) && + n.Flags == x.Flags && + (n.NewDst == x.NewDst || (n.NewDst != nil && n.NewDst.Equal(x.NewDst))) && + (n.Encap == x.Encap || (n.Encap != nil && n.Encap.Equal(x.Encap))) +} + +type nexthopInfoSlice []*NexthopInfo + +func (n nexthopInfoSlice) Equal(x []*NexthopInfo) bool { + if len(n) != len(x) { + return false + } + for i := range n { + if n[i] == nil || x[i] == nil { + return false + } + if !n[i].Equal(*x[i]) { + return false + } + } + return true +} + +// ipNetEqual returns true iff both IPNet are equal +func ipNetEqual(ipn1 *net.IPNet, ipn2 *net.IPNet) bool { + if ipn1 == ipn2 { + return true + } + if ipn1 == nil || ipn2 == nil { + return false + } + m1, _ := ipn1.Mask.Size() + m2, _ := ipn2.Mask.Size() + return m1 == m2 && ipn1.IP.Equal(ipn2.IP) +} diff --git a/vendor/github.com/vishvananda/netlink/route_linux.go b/vendor/github.com/vishvananda/netlink/route_linux.go index cd739e7146..8da8866573 100644 --- a/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/vendor/github.com/vishvananda/netlink/route_linux.go @@ -1,41 +1,50 @@ package netlink import ( + "bytes" + "encoding/binary" "fmt" "net" + "strconv" "strings" "syscall" "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) // RtAttr is shared so it is in netlink_linux.go const ( - SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE - SCOPE_SITE Scope = syscall.RT_SCOPE_SITE - SCOPE_LINK Scope = syscall.RT_SCOPE_LINK - SCOPE_HOST Scope = syscall.RT_SCOPE_HOST - SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE + SCOPE_UNIVERSE Scope = unix.RT_SCOPE_UNIVERSE + SCOPE_SITE Scope = unix.RT_SCOPE_SITE + SCOPE_LINK Scope = unix.RT_SCOPE_LINK + SCOPE_HOST Scope = unix.RT_SCOPE_HOST + SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE ) -const ( - RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) - RT_FILTER_SCOPE - RT_FILTER_TYPE - RT_FILTER_TOS - RT_FILTER_IIF - RT_FILTER_OIF - RT_FILTER_DST - RT_FILTER_SRC - RT_FILTER_GW - RT_FILTER_TABLE -) +func (s Scope) String() string { + switch s { + case SCOPE_UNIVERSE: + return "universe" + case SCOPE_SITE: + return "site" + case SCOPE_LINK: + return "link" + case SCOPE_HOST: + return "host" + case SCOPE_NOWHERE: + return "nowhere" + default: + return "unknown" + } +} + const ( - FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK - FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE + FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK + FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE ) var testFlags = []flagString{ @@ -86,6 +95,34 @@ func (d *MPLSDestination) String() string { return strings.Join(s, "/") } +func (d *MPLSDestination) Equal(x Destination) bool { + o, ok := x.(*MPLSDestination) + if !ok { + return false + } + if d == nil && o == nil { + return true + } + if d == nil || o == nil { + return false + } + if d.Labels == nil && o.Labels == nil { + return true + } + if d.Labels == nil || o.Labels == nil { + return false + } + if len(d.Labels) != len(o.Labels) { + return false + } + for i := range d.Labels { + if d.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + type MPLSEncap struct { Labels []int } @@ -96,17 +133,16 @@ func (e *MPLSEncap) Type() int { func (e *MPLSEncap) Decode(buf []byte) error { if len(buf) < 4 { - return fmt.Errorf("Lack of bytes") + return fmt.Errorf("lack of bytes") } - native := nl.NativeEndian() l := native.Uint16(buf) if len(buf) < int(l) { - return fmt.Errorf("Lack of bytes") + return fmt.Errorf("lack of bytes") } buf = buf[:l] typ := native.Uint16(buf[2:]) if typ != nl.MPLS_IPTUNNEL_DST { - return fmt.Errorf("Unknown MPLS Encap Type: %d", typ) + return fmt.Errorf("unknown MPLS Encap Type: %d", typ) } e.Labels = nl.DecodeMPLSStack(buf[4:]) return nil @@ -114,7 +150,6 @@ func (e *MPLSEncap) Decode(buf []byte) error { func (e *MPLSEncap) Encode() ([]byte, error) { s := nl.EncodeMPLSStack(e.Labels...) - native := nl.NativeEndian() hdr := make([]byte, 4) native.PutUint16(hdr, uint16(len(s)+4)) native.PutUint16(hdr[2:], nl.MPLS_IPTUNNEL_DST) @@ -129,6 +164,487 @@ func (e *MPLSEncap) String() string { return strings.Join(s, "/") } +func (e *MPLSEncap) Equal(x Encap) bool { + o, ok := x.(*MPLSEncap) + if !ok { + return false + } + if e == nil && o == nil { + return true + } + if e == nil || o == nil { + return false + } + if e.Labels == nil && o.Labels == nil { + return true + } + if e.Labels == nil || o.Labels == nil { + return false + } + if len(e.Labels) != len(o.Labels) { + return false + } + for i := range e.Labels { + if e.Labels[i] != o.Labels[i] { + return false + } + } + return true +} + +// SEG6 definitions +type SEG6Encap struct { + Mode int + Segments []net.IP +} + +func (e *SEG6Encap) Type() int { + return nl.LWTUNNEL_ENCAP_SEG6 +} +func (e *SEG6Encap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lack of bytes") + } + // Get Length(l) & Type(typ) : 2 + 2 bytes + l := native.Uint16(buf) + if len(buf) < int(l) { + return fmt.Errorf("lack of bytes") + } + buf = buf[:l] // make sure buf size upper limit is Length + typ := native.Uint16(buf[2:]) + // LWTUNNEL_ENCAP_SEG6 has only one attr type SEG6_IPTUNNEL_SRH + if typ != nl.SEG6_IPTUNNEL_SRH { + return fmt.Errorf("unknown SEG6 Type: %d", typ) + } + + var err error + e.Mode, e.Segments, err = nl.DecodeSEG6Encap(buf[4:]) + + return err +} +func (e *SEG6Encap) Encode() ([]byte, error) { + s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments) + hdr := make([]byte, 4) + native.PutUint16(hdr, uint16(len(s)+4)) + native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH) + return append(hdr, s...), err +} +func (e *SEG6Encap) String() string { + segs := make([]string, 0, len(e.Segments)) + // append segment backwards (from n to 0) since seg#0 is the last segment. + for i := len(e.Segments); i > 0; i-- { + segs = append(segs, e.Segments[i-1].String()) + } + str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode), + len(e.Segments), strings.Join(segs, " ")) + return str +} +func (e *SEG6Encap) Equal(x Encap) bool { + o, ok := x.(*SEG6Encap) + if !ok { + return false + } + if e == o { + return true + } + if e == nil || o == nil { + return false + } + if e.Mode != o.Mode { + return false + } + if len(e.Segments) != len(o.Segments) { + return false + } + for i := range e.Segments { + if !e.Segments[i].Equal(o.Segments[i]) { + return false + } + } + return true +} + +// SEG6LocalEncap definitions +type SEG6LocalEncap struct { + Flags [nl.SEG6_LOCAL_MAX]bool + Action int + Segments []net.IP // from SRH in seg6_local_lwt + Table int // table id for End.T and End.DT6 + InAddr net.IP + In6Addr net.IP + Iif int + Oif int +} + +func (e *SEG6LocalEncap) Type() int { + return nl.LWTUNNEL_ENCAP_SEG6_LOCAL +} +func (e *SEG6LocalEncap) Decode(buf []byte) error { + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return err + } + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.SEG6_LOCAL_ACTION: + e.Action = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_ACTION] = true + case nl.SEG6_LOCAL_SRH: + e.Segments, err = nl.DecodeSEG6Srh(attr.Value[:]) + e.Flags[nl.SEG6_LOCAL_SRH] = true + case nl.SEG6_LOCAL_TABLE: + e.Table = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_TABLE] = true + case nl.SEG6_LOCAL_NH4: + e.InAddr = net.IP(attr.Value[0:4]) + e.Flags[nl.SEG6_LOCAL_NH4] = true + case nl.SEG6_LOCAL_NH6: + e.In6Addr = net.IP(attr.Value[0:16]) + e.Flags[nl.SEG6_LOCAL_NH6] = true + case nl.SEG6_LOCAL_IIF: + e.Iif = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_IIF] = true + case nl.SEG6_LOCAL_OIF: + e.Oif = int(native.Uint32(attr.Value[0:4])) + e.Flags[nl.SEG6_LOCAL_OIF] = true + } + } + return err +} +func (e *SEG6LocalEncap) Encode() ([]byte, error) { + var err error + res := make([]byte, 8) + native.PutUint16(res, 8) // length + native.PutUint16(res[2:], nl.SEG6_LOCAL_ACTION) + native.PutUint32(res[4:], uint32(e.Action)) + if e.Flags[nl.SEG6_LOCAL_SRH] { + srh, err := nl.EncodeSEG6Srh(e.Segments) + if err != nil { + return nil, err + } + attr := make([]byte, 4) + native.PutUint16(attr, uint16(len(srh)+4)) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_SRH) + attr = append(attr, srh...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_TABLE] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_TABLE) + native.PutUint32(attr[4:], uint32(e.Table)) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_NH4] { + attr := make([]byte, 4) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH4) + ipv4 := e.InAddr.To4() + if ipv4 == nil { + err = fmt.Errorf("SEG6_LOCAL_NH4 has invalid IPv4 address") + return nil, err + } + attr = append(attr, ipv4...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_NH6] { + attr := make([]byte, 4) + native.PutUint16(attr, 20) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_NH6) + attr = append(attr, e.In6Addr...) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_IIF] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_IIF) + native.PutUint32(attr[4:], uint32(e.Iif)) + res = append(res, attr...) + } + if e.Flags[nl.SEG6_LOCAL_OIF] { + attr := make([]byte, 8) + native.PutUint16(attr, 8) + native.PutUint16(attr[2:], nl.SEG6_LOCAL_OIF) + native.PutUint32(attr[4:], uint32(e.Oif)) + res = append(res, attr...) + } + return res, err +} +func (e *SEG6LocalEncap) String() string { + strs := make([]string, 0, nl.SEG6_LOCAL_MAX) + strs = append(strs, fmt.Sprintf("action %s", nl.SEG6LocalActionString(e.Action))) + + if e.Flags[nl.SEG6_LOCAL_TABLE] { + strs = append(strs, fmt.Sprintf("table %d", e.Table)) + } + if e.Flags[nl.SEG6_LOCAL_NH4] { + strs = append(strs, fmt.Sprintf("nh4 %s", e.InAddr)) + } + if e.Flags[nl.SEG6_LOCAL_NH6] { + strs = append(strs, fmt.Sprintf("nh6 %s", e.In6Addr)) + } + if e.Flags[nl.SEG6_LOCAL_IIF] { + link, err := LinkByIndex(e.Iif) + if err != nil { + strs = append(strs, fmt.Sprintf("iif %d", e.Iif)) + } else { + strs = append(strs, fmt.Sprintf("iif %s", link.Attrs().Name)) + } + } + if e.Flags[nl.SEG6_LOCAL_OIF] { + link, err := LinkByIndex(e.Oif) + if err != nil { + strs = append(strs, fmt.Sprintf("oif %d", e.Oif)) + } else { + strs = append(strs, fmt.Sprintf("oif %s", link.Attrs().Name)) + } + } + if e.Flags[nl.SEG6_LOCAL_SRH] { + segs := make([]string, 0, len(e.Segments)) + //append segment backwards (from n to 0) since seg#0 is the last segment. + for i := len(e.Segments); i > 0; i-- { + segs = append(segs, e.Segments[i-1].String()) + } + strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) + } + return strings.Join(strs, " ") +} +func (e *SEG6LocalEncap) Equal(x Encap) bool { + o, ok := x.(*SEG6LocalEncap) + if !ok { + return false + } + if e == o { + return true + } + if e == nil || o == nil { + return false + } + // compare all arrays first + for i := range e.Flags { + if e.Flags[i] != o.Flags[i] { + return false + } + } + if len(e.Segments) != len(o.Segments) { + return false + } + for i := range e.Segments { + if !e.Segments[i].Equal(o.Segments[i]) { + return false + } + } + // compare values + if !e.InAddr.Equal(o.InAddr) || !e.In6Addr.Equal(o.In6Addr) { + return false + } + if e.Action != o.Action || e.Table != o.Table || e.Iif != o.Iif || e.Oif != o.Oif { + return false + } + return true +} + +// Encap BPF definitions +type bpfObj struct { + progFd int + progName string +} +type BpfEncap struct { + progs [nl.LWT_BPF_MAX]bpfObj + headroom int +} + +// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should +// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT). +func (e *BpfEncap) SetProg(mode, progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("lwt bpf SetProg: invalid fd") + } + if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM { + return fmt.Errorf("lwt bpf SetProg:invalid mode") + } + e.progs[mode].progFd = progFd + e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd) + return nil +} + +// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP. +// maximum headroom is LWT_BPF_MAX_HEADROOM +func (e *BpfEncap) SetXmitHeadroom(headroom int) error { + if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 { + return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM) + } + e.headroom = headroom + return nil +} + +func (e *BpfEncap) Type() int { + return nl.LWTUNNEL_ENCAP_BPF +} +func (e *BpfEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lwt bpf decode: lack of bytes") + } + native := nl.NativeEndian() + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err) + } + for _, attr := range attrs { + if int(attr.Attr.Type) < 1 { + // nl.LWT_BPF_UNSPEC + continue + } + if int(attr.Attr.Type) > nl.LWT_BPF_MAX { + return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type) + } + switch int(attr.Attr.Type) { + case nl.LWT_BPF_MAX_HEADROOM: + e.headroom = int(native.Uint32(attr.Value)) + default: + bpfO := bpfObj{} + parsedAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing route attribute") + } + for _, parsedAttr := range parsedAttrs { + switch int(parsedAttr.Attr.Type) { + case nl.LWT_BPF_PROG_FD: + bpfO.progFd = int(native.Uint32(parsedAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfO.progName = string(parsedAttr.Value) + default: + return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len) + } + } + e.progs[attr.Attr.Type] = bpfO + } + } + return nil +} + +func (e *BpfEncap) Encode() ([]byte, error) { + buf := make([]byte, 0) + native = nl.NativeEndian() + for index, attr := range e.progs { + nlMsg := nl.NewRtAttr(index, []byte{}) + if attr.progFd != 0 { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd))) + } + if attr.progName != "" { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName)) + } + if nlMsg.Len() > 4 { + buf = append(buf, nlMsg.Serialize()...) + } + } + if len(buf) <= 4 { + return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer") + } + if e.headroom > 0 { + hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom))) + buf = append(buf, hRoom.Serialize()...) + } + return buf, nil +} + +func (e *BpfEncap) String() string { + progs := make([]string, 0) + for index, obj := range e.progs { + empty := bpfObj{} + switch index { + case nl.LWT_BPF_IN: + if obj != empty { + progs = append(progs, fmt.Sprintf("in: %s", obj.progName)) + } + case nl.LWT_BPF_OUT: + if obj != empty { + progs = append(progs, fmt.Sprintf("out: %s", obj.progName)) + } + case nl.LWT_BPF_XMIT: + if obj != empty { + progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName)) + } + } + } + if e.headroom > 0 { + progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom)) + } + return strings.Join(progs, " ") +} + +func (e *BpfEncap) Equal(x Encap) bool { + o, ok := x.(*BpfEncap) + if !ok { + return false + } + if e.headroom != o.headroom { + return false + } + for i := range o.progs { + if o.progs[i] != e.progs[i] { + return false + } + } + return true +} + +type Via struct { + AddrFamily int + Addr net.IP +} + +func (v *Via) Equal(x Destination) bool { + o, ok := x.(*Via) + if !ok { + return false + } + if v.AddrFamily == x.Family() && v.Addr.Equal(o.Addr) { + return true + } + return false +} + +func (v *Via) String() string { + return fmt.Sprintf("Family: %d, Address: %s", v.AddrFamily, v.Addr.String()) +} + +func (v *Via) Family() int { + return v.AddrFamily +} + +func (v *Via) Encode() ([]byte, error) { + buf := &bytes.Buffer{} + err := binary.Write(buf, native, uint16(v.AddrFamily)) + if err != nil { + return nil, err + } + err = binary.Write(buf, native, v.Addr) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func (v *Via) Decode(b []byte) error { + if len(b) < 6 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.AddrFamily = int(native.Uint16(b[0:2])) + if v.AddrFamily == nl.FAMILY_V4 { + v.Addr = net.IP(b[2:6]) + return nil + } else if v.AddrFamily == nl.FAMILY_V6 { + if len(b) < 18 { + return fmt.Errorf("decoding failed: buffer too small (%d bytes)", len(b)) + } + v.Addr = net.IP(b[2:]) + return nil + } + return fmt.Errorf("decoding failed: address family %d unknown", v.AddrFamily) +} + // RouteAdd will add a route to the system. // Equivalent to: `ip route add $route` func RouteAdd(route *Route) error { @@ -138,8 +654,34 @@ func RouteAdd(route *Route) error { // RouteAdd will add a route to the system. // Equivalent to: `ip route add $route` func (h *Handle) RouteAdd(route *Route) error { - flags := syscall.NLM_F_CREATE | syscall.NLM_F_EXCL | syscall.NLM_F_ACK - req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags) + flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func RouteAppend(route *Route) error { + return pkgHandle.RouteAppend(route) +} + +// RouteAppend will append a route to the system. +// Equivalent to: `ip route append $route` +func (h *Handle) RouteAppend(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) + return h.routeHandle(route, req, nl.NewRtMsg()) +} + +// RouteAddEcmp will add a route to the system. +func RouteAddEcmp(route *Route) error { + return pkgHandle.RouteAddEcmp(route) +} + +// RouteAddEcmp will add a route to the system. +func (h *Handle) RouteAddEcmp(route *Route) error { + flags := unix.NLM_F_CREATE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) return h.routeHandle(route, req, nl.NewRtMsg()) } @@ -152,8 +694,8 @@ func RouteReplace(route *Route) error { // RouteReplace will add a route to the system. // Equivalent to: `ip route replace $route` func (h *Handle) RouteReplace(route *Route) error { - flags := syscall.NLM_F_CREATE | syscall.NLM_F_REPLACE | syscall.NLM_F_ACK - req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags) + flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK + req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) return h.routeHandle(route, req, nl.NewRtMsg()) } @@ -166,7 +708,7 @@ func RouteDel(route *Route) error { // RouteDel will delete a route from the system. // Equivalent to: `ip route del $route` func (h *Handle) RouteDel(route *Route) error { - req := h.newNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) return h.routeHandle(route, req, nl.NewRtDelMsg()) } @@ -189,12 +731,12 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } else { dstData = route.Dst.IP.To16() } - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData)) } else if route.MPLSDst != nil { family = nl.FAMILY_MPLS msg.Dst_len = uint8(20) - msg.Type = syscall.RTN_UNICAST - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst))) + msg.Type = unix.RTN_UNICAST + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst))) } if route.NewDst != nil { @@ -205,18 +747,24 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if err != nil { return err } - rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_NEWDST, buf)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_NEWDST, buf)) } if route.Encap != nil { buf := make([]byte, 2) native.PutUint16(buf, uint16(route.Encap.Type())) - rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) buf, err := route.Encap.Encode() if err != nil { return err } - rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP, buf)) + switch route.Encap.Type() { + case nl.LWTUNNEL_ENCAP_BPF: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf)) + default: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + } if route.Src != nil { @@ -232,7 +780,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg srcData = route.Src.To16() } // The commonly used src ip for routes is actually PREFSRC - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PREFSRC, srcData)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PREFSRC, srcData)) } if route.Gw != nil { @@ -247,14 +795,22 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } else { gwData = route.Gw.To16() } - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData)) + } + + if route.Via != nil { + buf, err := route.Via.Encode() + if err != nil { + return fmt.Errorf("failed to encode RTA_VIA: %v", err) + } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf)) } if len(route.MultiPath) > 0 { buf := []byte{} for _, nh := range route.MultiPath { rtnh := &nl.RtNexthop{ - RtNexthop: syscall.RtNexthop{ + RtNexthop: unix.RtNexthop{ Hops: uint8(nh.Hops), Ifindex: int32(nh.LinkIndex), Flags: uint8(nh.Flags), @@ -267,9 +823,9 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg return fmt.Errorf("gateway, source, and destination ip are not the same IP family") } if gwFamily == FAMILY_V4 { - children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4()))) + children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To4()))) } else { - children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16()))) + children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To16()))) } } if nh.NewDst != nil { @@ -280,30 +836,37 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if err != nil { return err } - children = append(children, nl.NewRtAttr(nl.RTA_NEWDST, buf)) + children = append(children, nl.NewRtAttr(unix.RTA_NEWDST, buf)) } if nh.Encap != nil { buf := make([]byte, 2) native.PutUint16(buf, uint16(nh.Encap.Type())) - rtAttrs = append(rtAttrs, nl.NewRtAttr(nl.RTA_ENCAP_TYPE, buf)) + children = append(children, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) buf, err := nh.Encap.Encode() if err != nil { return err } - children = append(children, nl.NewRtAttr(nl.RTA_ENCAP, buf)) + children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + if nh.Via != nil { + buf, err := nh.Via.Encode() + if err != nil { + return err + } + children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf)) } rtnh.Children = children buf = append(buf, rtnh.Serialize()...) } - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_MULTIPATH, buf)) } if route.Table > 0 { if route.Table >= 256 { - msg.Table = syscall.RT_TABLE_UNSPEC + msg.Table = unix.RT_TABLE_UNSPEC b := make([]byte, 4) native.PutUint32(b, uint32(route.Table)) - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_TABLE, b)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_TABLE, b)) } else { msg.Table = uint8(route.Table) } @@ -312,7 +875,12 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if route.Priority > 0 { b := make([]byte, 4) native.PutUint32(b, uint32(route.Priority)) - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PRIORITY, b)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b)) + } + if route.Realm > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Realm)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b)) } if route.Tos > 0 { msg.Tos = uint8(route.Tos) @@ -324,6 +892,80 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg msg.Type = uint8(route.Type) } + var metrics []*nl.RtAttr + if route.MTU > 0 { + b := nl.Uint32Attr(uint32(route.MTU)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b)) + } + if route.Window > 0 { + b := nl.Uint32Attr(uint32(route.Window)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_WINDOW, b)) + } + if route.Rtt > 0 { + b := nl.Uint32Attr(uint32(route.Rtt)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTT, b)) + } + if route.RttVar > 0 { + b := nl.Uint32Attr(uint32(route.RttVar)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTTVAR, b)) + } + if route.Ssthresh > 0 { + b := nl.Uint32Attr(uint32(route.Ssthresh)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_SSTHRESH, b)) + } + if route.Cwnd > 0 { + b := nl.Uint32Attr(uint32(route.Cwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CWND, b)) + } + if route.AdvMSS > 0 { + b := nl.Uint32Attr(uint32(route.AdvMSS)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b)) + } + if route.Reordering > 0 { + b := nl.Uint32Attr(uint32(route.Reordering)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_REORDERING, b)) + } + if route.Hoplimit > 0 { + b := nl.Uint32Attr(uint32(route.Hoplimit)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_HOPLIMIT, b)) + } + if route.InitCwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitCwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITCWND, b)) + } + if route.Features > 0 { + b := nl.Uint32Attr(uint32(route.Features)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FEATURES, b)) + } + if route.RtoMin > 0 { + b := nl.Uint32Attr(uint32(route.RtoMin)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_RTO_MIN, b)) + } + if route.InitRwnd > 0 { + b := nl.Uint32Attr(uint32(route.InitRwnd)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_INITRWND, b)) + } + if route.QuickACK > 0 { + b := nl.Uint32Attr(uint32(route.QuickACK)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_QUICKACK, b)) + } + if route.Congctl != "" { + b := nl.ZeroTerminated(route.Congctl) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_CC_ALGO, b)) + } + if route.FastOpenNoCookie > 0 { + b := nl.Uint32Attr(uint32(route.FastOpenNoCookie)) + metrics = append(metrics, nl.NewRtAttr(unix.RTAX_FASTOPEN_NO_COOKIE, b)) + } + + if metrics != nil { + attr := nl.NewRtAttr(unix.RTA_METRICS, nil) + for _, metric := range metrics { + attr.AddChild(metric) + } + rtAttrs = append(rtAttrs, attr) + } + msg.Flags = uint32(route.Flags) msg.Scope = uint8(route.Scope) msg.Family = uint8(family) @@ -332,15 +974,12 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg req.AddData(attr) } - var ( - b = make([]byte, 4) - native = nl.NativeEndian() - ) + b := make([]byte, 4) native.PutUint32(b, uint32(route.LinkIndex)) - req.AddData(nl.NewRtAttr(syscall.RTA_OIF, b)) + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -373,11 +1012,12 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e // RouteListFiltered gets a list of routes in the system filtered with specified rules. // All rules must be defined in RouteFilter struct func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { - req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) + rtmsg := nl.NewRtMsg() + rtmsg.Family = uint8(family) + req.AddData(rtmsg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) if err != nil { return nil, err } @@ -385,11 +1025,11 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) var res []Route for _, m := range msgs { msg := nl.DeserializeRtMsg(m) - if msg.Flags&syscall.RTM_F_CLONED != 0 { + if msg.Flags&unix.RTM_F_CLONED != 0 { // Ignore cloned routes continue } - if msg.Table != syscall.RT_TABLE_MAIN { + if msg.Table != unix.RT_TABLE_MAIN { if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 { // Ignore non-main tables continue @@ -401,7 +1041,7 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) } if filter != nil { switch { - case filterMask&RT_FILTER_TABLE != 0 && filter.Table != syscall.RT_TABLE_UNSPEC && route.Table != filter.Table: + case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table: continue case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol: continue @@ -411,6 +1051,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) continue case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: continue + case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: + continue case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: continue case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: @@ -421,21 +1063,12 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) continue case filterMask&RT_FILTER_DST != 0: if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) { - if filter.Dst == nil { - if route.Dst != nil { - continue - } - } else { - if route.Dst == nil { - continue - } - aMaskLen, aMaskBits := route.Dst.Mask.Size() - bMaskLen, bMaskBits := filter.Dst.Mask.Size() - if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) { - continue - } + if !ipNetEqual(route.Dst, filter.Dst) { + continue } } + case filterMask&RT_FILTER_HOPLIMIT != 0 && route.Hoplimit != filter.Hoplimit: + continue } } res = append(res, route) @@ -452,22 +1085,22 @@ func deserializeRoute(m []byte) (Route, error) { } route := Route{ Scope: Scope(msg.Scope), - Protocol: int(msg.Protocol), + Protocol: RouteProtocol(int(msg.Protocol)), Table: int(msg.Table), Type: int(msg.Type), Tos: int(msg.Tos), Flags: int(msg.Flags), + Family: int(msg.Family), } - native := nl.NativeEndian() var encap, encapType syscall.NetlinkRouteAttr for _, attr := range attrs { switch attr.Attr.Type { - case syscall.RTA_GATEWAY: + case unix.RTA_GATEWAY: route.Gw = net.IP(attr.Value) - case syscall.RTA_PREFSRC: + case unix.RTA_PREFSRC: route.Src = net.IP(attr.Value) - case syscall.RTA_DST: + case unix.RTA_DST: if msg.Family == nl.FAMILY_MPLS { stack := nl.DecodeMPLSStack(attr.Value) if len(stack) == 0 || len(stack) > 1 { @@ -480,38 +1113,40 @@ func deserializeRoute(m []byte) (Route, error) { Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)), } } - case syscall.RTA_OIF: + case unix.RTA_OIF: route.LinkIndex = int(native.Uint32(attr.Value[0:4])) - case syscall.RTA_IIF: + case unix.RTA_IIF: route.ILinkIndex = int(native.Uint32(attr.Value[0:4])) - case syscall.RTA_PRIORITY: + case unix.RTA_PRIORITY: route.Priority = int(native.Uint32(attr.Value[0:4])) - case syscall.RTA_TABLE: + case unix.RTA_FLOW: + route.Realm = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_TABLE: route.Table = int(native.Uint32(attr.Value[0:4])) - case syscall.RTA_MULTIPATH: + case unix.RTA_MULTIPATH: parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) { - if len(value) < syscall.SizeofRtNexthop { - return nil, nil, fmt.Errorf("Lack of bytes") + if len(value) < unix.SizeofRtNexthop { + return nil, nil, fmt.Errorf("lack of bytes") } nh := nl.DeserializeRtNexthop(value) if len(value) < int(nh.RtNexthop.Len) { - return nil, nil, fmt.Errorf("Lack of bytes") + return nil, nil, fmt.Errorf("lack of bytes") } info := &NexthopInfo{ LinkIndex: int(nh.RtNexthop.Ifindex), Hops: int(nh.RtNexthop.Hops), Flags: int(nh.RtNexthop.Flags), } - attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)]) + attrs, err := nl.ParseRouteAttr(value[unix.SizeofRtNexthop:int(nh.RtNexthop.Len)]) if err != nil { return nil, nil, err } var encap, encapType syscall.NetlinkRouteAttr for _, attr := range attrs { switch attr.Attr.Type { - case syscall.RTA_GATEWAY: + case unix.RTA_GATEWAY: info.Gw = net.IP(attr.Value) - case nl.RTA_NEWDST: + case unix.RTA_NEWDST: var d Destination switch msg.Family { case nl.FAMILY_MPLS: @@ -521,10 +1156,16 @@ func deserializeRoute(m []byte) (Route, error) { return nil, nil, err } info.NewDst = d - case nl.RTA_ENCAP_TYPE: + case unix.RTA_ENCAP_TYPE: encapType = attr - case nl.RTA_ENCAP: + case unix.RTA_ENCAP: encap = attr + case unix.RTA_VIA: + d := &Via{} + if err := d.Decode(attr.Value); err != nil { + return nil, nil, err + } + info.Via = d } } @@ -552,7 +1193,7 @@ func deserializeRoute(m []byte) (Route, error) { route.MultiPath = append(route.MultiPath, info) rest = buf } - case nl.RTA_NEWDST: + case unix.RTA_NEWDST: var d Destination switch msg.Family { case nl.FAMILY_MPLS: @@ -562,10 +1203,57 @@ func deserializeRoute(m []byte) (Route, error) { return route, err } route.NewDst = d - case nl.RTA_ENCAP_TYPE: + case unix.RTA_VIA: + v := &Via{} + if err := v.Decode(attr.Value); err != nil { + return route, err + } + route.Via = v + case unix.RTA_ENCAP_TYPE: encapType = attr - case nl.RTA_ENCAP: + case unix.RTA_ENCAP: encap = attr + case unix.RTA_METRICS: + metrics, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return route, err + } + for _, metric := range metrics { + switch metric.Attr.Type { + case unix.RTAX_MTU: + route.MTU = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_WINDOW: + route.Window = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTT: + route.Rtt = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTTVAR: + route.RttVar = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_SSTHRESH: + route.Ssthresh = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CWND: + route.Cwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_ADVMSS: + route.AdvMSS = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_REORDERING: + route.Reordering = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_HOPLIMIT: + route.Hoplimit = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITCWND: + route.InitCwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_FEATURES: + route.Features = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_RTO_MIN: + route.RtoMin = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_INITRWND: + route.InitRwnd = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_QUICKACK: + route.QuickACK = int(native.Uint32(metric.Value[0:4])) + case unix.RTAX_CC_ALGO: + route.Congctl = nl.BytesToString(metric.Value) + case unix.RTAX_FASTOPEN_NO_COOKIE: + route.FastOpenNoCookie = int(native.Uint32(metric.Value[0:4])) + } + } } } @@ -578,6 +1266,21 @@ func deserializeRoute(m []byte) (Route, error) { if err := e.Decode(encap.Value); err != nil { return route, err } + case nl.LWTUNNEL_ENCAP_SEG6: + e = &SEG6Encap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + case nl.LWTUNNEL_ENCAP_SEG6_LOCAL: + e = &SEG6LocalEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } + case nl.LWTUNNEL_ENCAP_BPF: + e = &BpfEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } } route.Encap = e } @@ -585,16 +1288,31 @@ func deserializeRoute(m []byte) (Route, error) { return route, nil } +// RouteGetOptions contains a set of options to use with +// RouteGetWithOptions +type RouteGetOptions struct { + Iif string + Oif string + VrfName string + SrcAddr net.IP +} + +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + return pkgHandle.RouteGetWithOptions(destination, options) +} + // RouteGet gets a route to a specific destination from the host system. // Equivalent to: 'ip route get'. func RouteGet(destination net.IP) ([]Route, error) { return pkgHandle.RouteGet(destination) } -// RouteGet gets a route to a specific destination from the host system. -// Equivalent to: 'ip route get'. -func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { - req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST) +// RouteGetWithOptions gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get <> vrf '. +func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOptions) ([]Route, error) { + req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST) family := nl.GetIPFamily(destination) var destinationData []byte var bitlen uint8 @@ -608,12 +1326,64 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { msg := &nl.RtMsg{} msg.Family = uint8(family) msg.Dst_len = bitlen + if options != nil && options.SrcAddr != nil { + msg.Src_len = bitlen + } + msg.Flags = unix.RTM_F_LOOKUP_TABLE req.AddData(msg) - rtaDst := nl.NewRtAttr(syscall.RTA_DST, destinationData) + rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData) req.AddData(rtaDst) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE) + if options != nil { + if options.VrfName != "" { + link, err := LinkByName(options.VrfName) + if err != nil { + return nil, err + } + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if len(options.Iif) > 0 { + link, err := LinkByName(options.Iif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) + } + + if len(options.Oif) > 0 { + link, err := LinkByName(options.Oif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + + if options.SrcAddr != nil { + var srcAddr []byte + if family == FAMILY_V4 { + srcAddr = options.SrcAddr.To4() + } else { + srcAddr = options.SrcAddr.To16() + } + + req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) + } + } + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) if err != nil { return nil, err } @@ -627,23 +1397,47 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { res = append(res, route) } return res, nil +} +// RouteGet gets a route to a specific destination from the host system. +// Equivalent to: 'ip route get'. +func (h *Handle) RouteGet(destination net.IP) ([]Route, error) { + return h.RouteGetWithOptions(destination, nil) } // RouteSubscribe takes a chan down which notifications will be sent // when routes are added or deleted. Close the 'done' chan to stop subscription. func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(netns.None(), netns.None(), ch, done) + return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil, false) } // RouteSubscribeAt works like RouteSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - return routeSubscribeAt(ns, netns.None(), ch, done) + return routeSubscribeAt(ns, netns.None(), ch, done, nil, false) +} + +// RouteSubscribeOptions contains a set of options to use with +// RouteSubscribeWithOptions. +type RouteSubscribeOptions struct { + Namespace *netns.NsHandle + ErrorCallback func(error) + ListExisting bool } -func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error { - s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE) +// RouteSubscribeWithOptions work like RouteSubscribe but enable to +// provide additional options to modify the behavior. Currently, the +// namespace can be provided as well as an error callback. +func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, options RouteSubscribeOptions) error { + if options.Namespace == nil { + none := netns.None() + options.Namespace = &none + } + return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting) +} + +func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { + s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE) if err != nil { return err } @@ -653,17 +1447,53 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < s.Close() }() } + if listExisting { + req := pkgHandle.newNetlinkRequest(unix.RTM_GETROUTE, + unix.NLM_F_DUMP) + infmsg := nl.NewIfInfomsg(unix.AF_UNSPEC) + req.AddData(infmsg) + if err := s.Send(req); err != nil { + return err + } + } go func() { defer close(ch) for { - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { + if cberr != nil { + cberr(fmt.Errorf("Receive failed: %v", + err)) + } return } + if from.Pid != nl.PidKernel { + if cberr != nil { + cberr(fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel)) + } + continue + } for _, m := range msgs { + if m.Header.Type == unix.NLMSG_DONE { + continue + } + if m.Header.Type == unix.NLMSG_ERROR { + error := int32(native.Uint32(m.Data[0:4])) + if error == 0 { + continue + } + if cberr != nil { + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) + } + continue + } route, err := deserializeRoute(m.Data) if err != nil { - return + if cberr != nil { + cberr(err) + } + continue } ch <- RouteUpdate{Type: m.Header.Type, Route: route} } @@ -672,3 +1502,54 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < return nil } + +func (p RouteProtocol) String() string { + switch int(p) { + case unix.RTPROT_BABEL: + return "babel" + case unix.RTPROT_BGP: + return "bgp" + case unix.RTPROT_BIRD: + return "bird" + case unix.RTPROT_BOOT: + return "boot" + case unix.RTPROT_DHCP: + return "dhcp" + case unix.RTPROT_DNROUTED: + return "dnrouted" + case unix.RTPROT_EIGRP: + return "eigrp" + case unix.RTPROT_GATED: + return "gated" + case unix.RTPROT_ISIS: + return "isis" + //case unix.RTPROT_KEEPALIVED: + // return "keepalived" + case unix.RTPROT_KERNEL: + return "kernel" + case unix.RTPROT_MROUTED: + return "mrouted" + case unix.RTPROT_MRT: + return "mrt" + case unix.RTPROT_NTK: + return "ntk" + case unix.RTPROT_OSPF: + return "ospf" + case unix.RTPROT_RA: + return "ra" + case unix.RTPROT_REDIRECT: + return "redirect" + case unix.RTPROT_RIP: + return "rip" + case unix.RTPROT_STATIC: + return "static" + case unix.RTPROT_UNSPEC: + return "unspec" + case unix.RTPROT_XORP: + return "xorp" + case unix.RTPROT_ZEBRA: + return "zebra" + default: + return strconv.Itoa(int(p)) + } +} diff --git a/vendor/github.com/vishvananda/netlink/route_unspecified.go b/vendor/github.com/vishvananda/netlink/route_unspecified.go index 2701862b4b..db73726895 100644 --- a/vendor/github.com/vishvananda/netlink/route_unspecified.go +++ b/vendor/github.com/vishvananda/netlink/route_unspecified.go @@ -2,6 +2,8 @@ package netlink +import "strconv" + func (r *Route) ListFlags() []string { return []string{} } @@ -9,3 +11,11 @@ func (r *Route) ListFlags() []string { func (n *NexthopInfo) ListFlags() []string { return []string{} } + +func (s Scope) String() string { + return "unknown" +} + +func (p RouteProtocol) String() string { + return strconv.Itoa(int(p)) +} diff --git a/vendor/github.com/vishvananda/netlink/rule.go b/vendor/github.com/vishvananda/netlink/rule.go index f0243defd7..53cd3d4f6a 100644 --- a/vendor/github.com/vishvananda/netlink/rule.go +++ b/vendor/github.com/vishvananda/netlink/rule.go @@ -8,9 +8,11 @@ import ( // Rule represents a netlink rule. type Rule struct { Priority int + Family int Table int Mark int Mask int + Tos uint TunID uint Goto int Src *net.IPNet @@ -20,10 +22,25 @@ type Rule struct { OifName string SuppressIfgroup int SuppressPrefixlen int + Invert bool + Dport *RulePortRange + Sport *RulePortRange + IPProto int } func (r Rule) String() string { - return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table) + from := "all" + if r.Src != nil && r.Src.String() != "" { + from = r.Src.String() + } + + to := "all" + if r.Dst != nil && r.Dst.String() != "" { + to = r.Dst.String() + } + + return fmt.Sprintf("ip rule %d: from %s to %s table %d", + r.Priority, from, to, r.Table) } // NewRule return empty rules. @@ -38,3 +55,14 @@ func NewRule() *Rule { Flow: -1, } } + +// NewRulePortRange creates rule sport/dport range. +func NewRulePortRange(start, end uint16) *RulePortRange { + return &RulePortRange{Start: start, End: end} +} + +// RulePortRange represents rule sport/dport range. +type RulePortRange struct { + Start uint16 + End uint16 +} diff --git a/vendor/github.com/vishvananda/netlink/rule_linux.go b/vendor/github.com/vishvananda/netlink/rule_linux.go index f9cdc855f1..3ae2138808 100644 --- a/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -1,13 +1,16 @@ package netlink import ( + "bytes" "fmt" "net" - "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) +const FibRuleInvert = 0x2 + // RuleAdd adds a rule to the system. // Equivalent to: ip rule add func RuleAdd(rule *Rule) error { @@ -17,7 +20,7 @@ func RuleAdd(rule *Rule) error { // RuleAdd adds a rule to the system. // Equivalent to: ip rule add func (h *Handle) RuleAdd(rule *Rule) error { - req := h.newNetlinkRequest(syscall.RTM_NEWRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_NEWRULE, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) return ruleHandle(rule, req) } @@ -30,15 +33,34 @@ func RuleDel(rule *Rule) error { // RuleDel deletes a rule from the system. // Equivalent to: ip rule del func (h *Handle) RuleDel(rule *Rule) error { - req := h.newNetlinkRequest(syscall.RTM_DELRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(unix.RTM_DELRULE, unix.NLM_F_ACK) return ruleHandle(rule, req) } func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { msg := nl.NewRtMsg() - msg.Family = syscall.AF_INET - var dstFamily uint8 + msg.Family = unix.AF_INET + msg.Protocol = unix.RTPROT_BOOT + msg.Scope = unix.RT_SCOPE_UNIVERSE + msg.Table = unix.RT_TABLE_UNSPEC + msg.Type = unix.RTN_UNSPEC + if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 { + msg.Type = unix.RTN_UNICAST + } + if rule.Invert { + msg.Flags |= FibRuleInvert + } + if rule.Family != 0 { + msg.Family = uint8(rule.Family) + } + if rule.Table >= 0 && rule.Table < 256 { + msg.Table = uint8(rule.Table) + } + if rule.Tos != 0 { + msg.Tos = uint8(rule.Tos) + } + var dstFamily uint8 var rtAttrs []*nl.RtAttr if rule.Dst != nil && rule.Dst.IP != nil { dstLen, _ := rule.Dst.Mask.Size() @@ -46,12 +68,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { msg.Family = uint8(nl.GetIPFamily(rule.Dst.IP)) dstFamily = msg.Family var dstData []byte - if msg.Family == syscall.AF_INET { + if msg.Family == unix.AF_INET { dstData = rule.Dst.IP.To4() } else { dstData = rule.Dst.IP.To16() } - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData)) } if rule.Src != nil && rule.Src.IP != nil { @@ -62,19 +84,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { srcLen, _ := rule.Src.Mask.Size() msg.Src_len = uint8(srcLen) var srcData []byte - if msg.Family == syscall.AF_INET { + if msg.Family == unix.AF_INET { srcData = rule.Src.IP.To4() } else { srcData = rule.Src.IP.To16() } - rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_SRC, srcData)) - } - - if rule.Table >= 0 { - msg.Table = uint8(rule.Table) - if rule.Table >= 256 { - msg.Table = syscall.RT_TABLE_UNSPEC - } + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_SRC, srcData)) } req.AddData(msg) @@ -82,8 +97,6 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(rtAttrs[i]) } - native := nl.NativeEndian() - if rule.Priority >= 0 { b := make([]byte, 4) native.PutUint32(b, uint32(rule.Priority)) @@ -127,19 +140,35 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { } } if rule.IifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName))) + req.AddData(nl.NewRtAttr(nl.FRA_IIFNAME, []byte(rule.IifName+"\x00"))) } if rule.OifName != "" { - req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName))) + req.AddData(nl.NewRtAttr(nl.FRA_OIFNAME, []byte(rule.OifName+"\x00"))) } if rule.Goto >= 0 { - msg.Type = nl.FR_ACT_NOP + msg.Type = nl.FR_ACT_GOTO b := make([]byte, 4) native.PutUint32(b, uint32(rule.Goto)) req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b)) } - _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + if rule.IPProto > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.IPProto)) + req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b)) + } + + if rule.Dport != nil { + b := rule.Dport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b)) + } + + if rule.Sport != nil { + b := rule.Sport.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) + } + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -152,16 +181,28 @@ func RuleList(family int) ([]Rule, error) { // RuleList lists rules in the system. // Equivalent to: ip rule list func (h *Handle) RuleList(family int) ([]Rule, error) { - req := h.newNetlinkRequest(syscall.RTM_GETRULE, syscall.NLM_F_DUMP|syscall.NLM_F_REQUEST) + return h.RuleListFiltered(family, nil, 0) +} + +// RuleListFiltered gets a list of rules in the system filtered by the +// specified rule template `filter`. +// Equivalent to: ip rule list +func RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + return pkgHandle.RuleListFiltered(family, filter, filterMask) +} + +// RuleListFiltered lists rules in the system. +// Equivalent to: ip rule list +func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ([]Rule, error) { + req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST) msg := nl.NewIfInfomsg(family) req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWRULE) + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE) if err != nil { return nil, err } - native := nl.NativeEndian() var res = make([]Rule, 0) for i := range msgs { msg := nl.DeserializeRtMsg(msgs[i]) @@ -172,9 +213,12 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { rule := NewRule() + rule.Invert = msg.Flags&FibRuleInvert > 0 + rule.Tos = uint(msg.Tos) + for j := range attrs { switch attrs[j].Attr.Type { - case syscall.RTA_TABLE: + case unix.RTA_TABLE: rule.Table = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_SRC: rule.Src = &net.IPNet{ @@ -191,7 +235,7 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { case nl.FRA_FWMASK: rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_TUN_ID: - rule.TunID = uint(native.Uint64(attrs[j].Value[0:4])) + rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) case nl.FRA_IIFNAME: rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) case nl.FRA_OIFNAME: @@ -212,10 +256,46 @@ func (h *Handle) RuleList(family int) ([]Rule, error) { rule.Goto = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_PRIORITY: rule.Priority = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_IP_PROTO: + rule.IPProto = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_DPORT_RANGE: + rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_SPORT_RANGE: + rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + } + } + + if filter != nil { + switch { + case filterMask&RT_FILTER_SRC != 0 && + (rule.Src == nil || rule.Src.String() != filter.Src.String()): + continue + case filterMask&RT_FILTER_DST != 0 && + (rule.Dst == nil || rule.Dst.String() != filter.Dst.String()): + continue + case filterMask&RT_FILTER_TABLE != 0 && + filter.Table != unix.RT_TABLE_UNSPEC && rule.Table != filter.Table: + continue + case filterMask&RT_FILTER_TOS != 0 && rule.Tos != filter.Tos: + continue + case filterMask&RT_FILTER_PRIORITY != 0 && rule.Priority != filter.Priority: + continue + case filterMask&RT_FILTER_MARK != 0 && rule.Mark != filter.Mark: + continue + case filterMask&RT_FILTER_MASK != 0 && rule.Mask != filter.Mask: + continue } } + res = append(res, *rule) } return res, nil } + +func (pr *RulePortRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 2), make([]byte, 2)} + native.PutUint16(b[0], pr.Start) + native.PutUint16(b[1], pr.End) + return bytes.Join(b, []byte{}) +} diff --git a/vendor/github.com/vishvananda/netlink/socket_linux.go b/vendor/github.com/vishvananda/netlink/socket_linux.go index b42b84f0cf..b881fe496d 100644 --- a/vendor/github.com/vishvananda/netlink/socket_linux.go +++ b/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -7,6 +7,7 @@ import ( "syscall" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) const ( @@ -49,10 +50,15 @@ func (r *socketRequest) Serialize() []byte { native.PutUint32(b.Next(4), r.States) networkOrder.PutUint16(b.Next(2), r.ID.SourcePort) networkOrder.PutUint16(b.Next(2), r.ID.DestinationPort) - copy(b.Next(4), r.ID.Source.To4()) - b.Next(12) - copy(b.Next(4), r.ID.Destination.To4()) - b.Next(12) + if r.Family == unix.AF_INET6 { + copy(b.Next(16), r.ID.Source) + copy(b.Next(16), r.ID.Destination) + } else { + copy(b.Next(4), r.ID.Source.To4()) + b.Next(12) + copy(b.Next(4), r.ID.Destination.To4()) + b.Next(12) + } native.PutUint32(b.Next(4), r.ID.Interface) native.PutUint32(b.Next(4), r.ID.Cookie[0]) native.PutUint32(b.Next(4), r.ID.Cookie[1]) @@ -89,10 +95,15 @@ func (s *Socket) deserialize(b []byte) error { s.Retrans = rb.Read() s.ID.SourcePort = networkOrder.Uint16(rb.Next(2)) s.ID.DestinationPort = networkOrder.Uint16(rb.Next(2)) - s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) - s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) - rb.Next(12) + if s.Family == unix.AF_INET6 { + s.ID.Source = net.IP(rb.Next(16)) + s.ID.Destination = net.IP(rb.Next(16)) + } else { + s.ID.Source = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + s.ID.Destination = net.IPv4(rb.Read(), rb.Read(), rb.Read(), rb.Read()) + rb.Next(12) + } s.ID.Interface = native.Uint32(rb.Next(4)) s.ID.Cookie[0] = native.Uint32(rb.Next(4)) s.ID.Cookie[1] = native.Uint32(rb.Next(4)) @@ -123,15 +134,15 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { return nil, ErrNotImplemented } - s, err := nl.Subscribe(syscall.NETLINK_INET_DIAG) + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) if err != nil { return nil, err } defer s.Close() req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0) req.AddData(&socketRequest{ - Family: syscall.AF_INET, - Protocol: syscall.IPPROTO_TCP, + Family: unix.AF_INET, + Protocol: unix.IPPROTO_TCP, ID: SocketID{ SourcePort: uint16(localTCP.Port), DestinationPort: uint16(remoteTCP.Port), @@ -141,10 +152,13 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { }, }) s.Send(req) - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { return nil, err } + if from.Pid != nl.PidKernel { + return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } if len(msgs) == 0 { return nil, errors.New("no message nor error from netlink") } @@ -157,3 +171,121 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { } return sock, nil } + +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. +func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { + var result []*InetDiagTCPInfoResp + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:]) + if err != nil { + return err + } + + res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + var result []*Socket + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + result = append(result, sockInfo) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type. +func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&socketRequest{ + Family: family, + Protocol: unix.IPPROTO_TCP, + Ext: (1 << (INET_DIAG_VEGASINFO - 1)) | (1 << (INET_DIAG_INFO - 1)), + States: uint32(0xfff), // All TCP states + }) + s.Send(req) + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { + var tcpInfo *TCPInfo + var tcpBBRInfo *TCPBBRInfo + for _, a := range attrs { + if a.Attr.Type == INET_DIAG_INFO { + tcpInfo = &TCPInfo{} + if err := tcpInfo.deserialize(a.Value); err != nil { + return nil, err + } + continue + } + + if a.Attr.Type == INET_DIAG_BBRINFO { + tcpBBRInfo = &TCPBBRInfo{} + if err := tcpBBRInfo.deserialize(a.Value); err != nil { + return nil, err + } + continue + } + } + + return &InetDiagTCPInfoResp{ + InetDiagMsg: sockInfo, + TCPInfo: tcpInfo, + TCPBBRInfo: tcpBBRInfo, + }, nil +} diff --git a/vendor/github.com/vishvananda/netlink/tcp.go b/vendor/github.com/vishvananda/netlink/tcp.go new file mode 100644 index 0000000000..23ca014d43 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp.go @@ -0,0 +1,84 @@ +package netlink + +// TCP States +const ( + TCP_ESTABLISHED = iota + 0x01 + TCP_SYN_SENT + TCP_SYN_RECV + TCP_FIN_WAIT1 + TCP_FIN_WAIT2 + TCP_TIME_WAIT + TCP_CLOSE + TCP_CLOSE_WAIT + TCP_LAST_ACK + TCP_LISTEN + TCP_CLOSING + TCP_NEW_SYN_REC + TCP_MAX_STATES +) + +type TCPInfo struct { + State uint8 + Ca_state uint8 + Retransmits uint8 + Probes uint8 + Backoff uint8 + Options uint8 + Snd_wscale uint8 // no uint4 + Rcv_wscale uint8 + Delivery_rate_app_limited uint8 + Fastopen_client_fail uint8 + Rto uint32 + Ato uint32 + Snd_mss uint32 + Rcv_mss uint32 + Unacked uint32 + Sacked uint32 + Lost uint32 + Retrans uint32 + Fackets uint32 + Last_data_sent uint32 + Last_ack_sent uint32 + Last_data_recv uint32 + Last_ack_recv uint32 + Pmtu uint32 + Rcv_ssthresh uint32 + Rtt uint32 + Rttvar uint32 + Snd_ssthresh uint32 + Snd_cwnd uint32 + Advmss uint32 + Reordering uint32 + Rcv_rtt uint32 + Rcv_space uint32 + Total_retrans uint32 + Pacing_rate uint64 + Max_pacing_rate uint64 + Bytes_acked uint64 /* RFC4898 tcpEStatsAppHCThruOctetsAcked */ + Bytes_received uint64 /* RFC4898 tcpEStatsAppHCThruOctetsReceived */ + Segs_out uint32 /* RFC4898 tcpEStatsPerfSegsOut */ + Segs_in uint32 /* RFC4898 tcpEStatsPerfSegsIn */ + Notsent_bytes uint32 + Min_rtt uint32 + Data_segs_in uint32 /* RFC4898 tcpEStatsDataSegsIn */ + Data_segs_out uint32 /* RFC4898 tcpEStatsDataSegsOut */ + Delivery_rate uint64 + Busy_time uint64 /* Time (usec) busy sending data */ + Rwnd_limited uint64 /* Time (usec) limited by receive window */ + Sndbuf_limited uint64 /* Time (usec) limited by send buffer */ + Delivered uint32 + Delivered_ce uint32 + Bytes_sent uint64 /* RFC4898 tcpEStatsPerfHCDataOctetsOut */ + Bytes_retrans uint64 /* RFC4898 tcpEStatsPerfOctetsRetrans */ + Dsack_dups uint32 /* RFC4898 tcpEStatsStackDSACKDups */ + Reord_seen uint32 /* reordering events seen */ + Rcv_ooopack uint32 /* Out-of-order packets received */ + Snd_wnd uint32 /* peer's advertised receive window after * scaling (bytes) */ +} + +type TCPBBRInfo struct { + BBRBW uint64 + BBRMinRTT uint32 + BBRPacingGain uint32 + BBRCwndGain uint32 +} diff --git a/vendor/github.com/vishvananda/netlink/tcp_linux.go b/vendor/github.com/vishvananda/netlink/tcp_linux.go new file mode 100644 index 0000000000..293858738d --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/tcp_linux.go @@ -0,0 +1,353 @@ +package netlink + +import ( + "bytes" + "errors" + "io" +) + +const ( + tcpBBRInfoLen = 20 +) + +func checkDeserErr(err error) error { + if err == io.EOF { + return nil + } + return err +} + +func (t *TCPInfo) deserialize(b []byte) error { + var err error + rb := bytes.NewBuffer(b) + + t.State, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Ca_state, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Retransmits, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Probes, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + t.Backoff, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Options, err = rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + + scales, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Snd_wscale = scales >> 4 // first 4 bits + t.Rcv_wscale = scales & 0xf // last 4 bits + + rateLimAndFastOpen, err := rb.ReadByte() + if err != nil { + return checkDeserErr(err) + } + t.Delivery_rate_app_limited = rateLimAndFastOpen >> 7 // get first bit + t.Fastopen_client_fail = rateLimAndFastOpen >> 5 & 3 // get next two bits + + next := rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rto = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Ato = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_mss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Unacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Sacked = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Lost = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Retrans = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Fackets = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_sent = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_data_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Last_ack_recv = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Pmtu = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rttvar = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_ssthresh = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_cwnd = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Advmss = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reordering = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_rtt = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_space = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Total_retrans = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Max_pacing_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_acked = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_received = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_out = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Notsent_bytes = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Min_rtt = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_in = native.Uint32(next) + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Data_segs_out = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Delivery_rate = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Busy_time = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Rwnd_limited = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Sndbuf_limited = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Delivered_ce = native.Uint32(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_sent = native.Uint64(next) + + next = rb.Next(8) + if len(next) == 0 { + return nil + } + t.Bytes_retrans = native.Uint64(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Dsack_dups = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Reord_seen = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Rcv_ooopack = native.Uint32(next) + + next = rb.Next(4) + if len(next) == 0 { + return nil + } + t.Snd_wnd = native.Uint32(next) + return nil +} + +func (t *TCPBBRInfo) deserialize(b []byte) error { + if len(b) != tcpBBRInfoLen { + return errors.New("Invalid length") + } + + rb := bytes.NewBuffer(b) + t.BBRBW = native.Uint64(rb.Next(8)) + t.BBRMinRTT = native.Uint32(rb.Next(4)) + t.BBRPacingGain = native.Uint32(rb.Next(4)) + t.BBRCwndGain = native.Uint32(rb.Next(4)) + + return nil +} diff --git a/vendor/github.com/vishvananda/netlink/xfrm.go b/vendor/github.com/vishvananda/netlink/xfrm.go index 9962dcf700..02b41842e1 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm.go +++ b/vendor/github.com/vishvananda/netlink/xfrm.go @@ -2,19 +2,20 @@ package netlink import ( "fmt" - "syscall" + + "golang.org/x/sys/unix" ) // Proto is an enum representing an ipsec protocol. type Proto uint8 const ( - XFRM_PROTO_ROUTE2 Proto = syscall.IPPROTO_ROUTING - XFRM_PROTO_ESP Proto = syscall.IPPROTO_ESP - XFRM_PROTO_AH Proto = syscall.IPPROTO_AH - XFRM_PROTO_HAO Proto = syscall.IPPROTO_DSTOPTS + XFRM_PROTO_ROUTE2 Proto = unix.IPPROTO_ROUTING + XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP + XFRM_PROTO_AH Proto = unix.IPPROTO_AH + XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin - XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW + XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW ) func (p Proto) String() string { diff --git a/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go index 7b98c9cb6d..985d3a915f 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_monitor_linux.go @@ -2,11 +2,10 @@ package netlink import ( "fmt" - "syscall" - - "github.com/vishvananda/netns" "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" ) type XfrmMsg interface { @@ -39,7 +38,7 @@ func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error if err != nil { return nil } - s, err := nl.SubscribeAt(netns.None(), netns.None(), syscall.NETLINK_XFRM, groups...) + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_XFRM, groups...) if err != nil { return err } @@ -55,11 +54,15 @@ func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error go func() { defer close(ch) for { - msgs, err := s.Receive() + msgs, from, err := s.Receive() if err != nil { errorChan <- err return } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } for _, m := range msgs { switch m.Header.Type { case nl.XFRM_MSG_EXPIRE: diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy.go b/vendor/github.com/vishvananda/netlink/xfrm_policy.go index c97ec43a25..b7532b092c 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy.go @@ -35,16 +35,36 @@ func (d Dir) String() string { return fmt.Sprintf("socket %d", d-XFRM_SOCKET_IN) } +// PolicyAction is an enum representing an ipsec policy action. +type PolicyAction uint8 + +const ( + XFRM_POLICY_ALLOW PolicyAction = 0 + XFRM_POLICY_BLOCK PolicyAction = 1 +) + +func (a PolicyAction) String() string { + switch a { + case XFRM_POLICY_ALLOW: + return "allow" + case XFRM_POLICY_BLOCK: + return "block" + default: + return fmt.Sprintf("action %d", a) + } +} + // XfrmPolicyTmpl encapsulates a rule for the base addresses of an ipsec // policy. These rules are matched with XfrmState to determine encryption // and authentication algorithms. type XfrmPolicyTmpl struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + Optional int } func (t XfrmPolicyTmpl) String() string { @@ -64,11 +84,14 @@ type XfrmPolicy struct { Dir Dir Priority int Index int + Action PolicyAction + Ifindex int + Ifid int Mark *XfrmMark Tmpls []XfrmPolicyTmpl } func (p XfrmPolicy) String() string { - return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Mark: %s, Tmpls: %s}", - p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Mark, p.Tmpls) + return fmt.Sprintf("{Dst: %v, Src: %v, Proto: %s, DstPort: %d, SrcPort: %d, Dir: %s, Priority: %d, Index: %d, Action: %s, Ifindex: %d, Ifid: %d, Mark: %s, Tmpls: %s}", + p.Dst, p.Src, p.Proto, p.DstPort, p.SrcPort, p.Dir, p.Priority, p.Index, p.Action, p.Ifindex, p.Ifid, p.Mark, p.Tmpls) } diff --git a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go index c3d4e42227..3584968041 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -1,9 +1,8 @@ package netlink import ( - "syscall" - "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { @@ -28,6 +27,7 @@ func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) { if sel.Sport != 0 { sel.SportMask = ^uint16(0) } + sel.Ifindex = int32(policy.Ifindex) } // XfrmPolicyAdd will add an xfrm policy to the system. @@ -55,13 +55,14 @@ func (h *Handle) XfrmPolicyUpdate(policy *XfrmPolicy) error { } func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { - req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) msg := &nl.XfrmUserpolicyInfo{} selFromPolicy(&msg.Sel, policy) msg.Priority = uint32(policy.Priority) msg.Index = uint32(policy.Index) msg.Dir = uint8(policy.Dir) + msg.Action = uint8(policy.Action) msg.Lft.SoftByteLimit = nl.XFRM_INF msg.Lft.HardByteLimit = nl.XFRM_INF msg.Lft.SoftPacketLimit = nl.XFRM_INF @@ -78,6 +79,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) userTmpl.Mode = uint8(tmpl.Mode) userTmpl.Reqid = uint32(tmpl.Reqid) + userTmpl.Optional = uint8(tmpl.Optional) userTmpl.Aalgos = ^uint32(0) userTmpl.Ealgos = ^uint32(0) userTmpl.Calgos = ^uint32(0) @@ -91,7 +93,12 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { req.AddData(out) } - _, err := req.Execute(syscall.NETLINK_XFRM, 0) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } + + _, err := req.Execute(unix.NETLINK_XFRM, 0) return err } @@ -121,12 +128,12 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { // Equivalent to: `ip xfrm policy show`. // The list can be filtered by ip family. func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) { - req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, unix.NLM_F_DUMP) msg := nl.NewIfInfomsg(family) req.AddData(msg) - msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY) + msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY) if err != nil { return nil, err } @@ -165,13 +172,13 @@ func XfrmPolicyFlush() error { // XfrmPolicyFlush will flush the policies on the system. // Equivalent to: `ip xfrm policy flush` func (h *Handle) XfrmPolicyFlush() error { - req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, syscall.NLM_F_ACK) - _, err := req.Execute(syscall.NETLINK_XFRM, 0) + req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, unix.NLM_F_ACK) + _, err := req.Execute(unix.NETLINK_XFRM, 0) return err } func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPolicy, error) { - req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK) msg := &nl.XfrmUserpolicyId{} selFromPolicy(&msg.Sel, policy) @@ -184,12 +191,17 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo req.AddData(out) } + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } + resType := nl.XFRM_MSG_NEWPOLICY if nlProto == nl.XFRM_MSG_DELPOLICY { resType = 0 } - msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType)) + msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType)) if err != nil { return nil, err } @@ -198,12 +210,7 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo return nil, err } - p, err := parseXfrmPolicy(msgs[0], FAMILY_ALL) - if err != nil { - return nil, err - } - - return p, nil + return parseXfrmPolicy(msgs[0], FAMILY_ALL) } func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { @@ -221,9 +228,11 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { policy.Proto = Proto(msg.Sel.Proto) policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) + policy.Ifindex = int(msg.Sel.Ifindex) policy.Priority = int(msg.Priority) policy.Index = int(msg.Index) policy.Dir = Dir(msg.Dir) + policy.Action = PolicyAction(msg.Action) attrs, err := nl.ParseRouteAttr(m[msg.Len():]) if err != nil { @@ -243,6 +252,7 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { resTmpl.Mode = Mode(tmpl.Mode) resTmpl.Spi = int(nl.Swap32(tmpl.XfrmId.Spi)) resTmpl.Reqid = int(tmpl.Reqid) + resTmpl.Optional = int(tmpl.Optional) policy.Tmpls = append(policy.Tmpls, resTmpl) } case nl.XFRMA_MARK: @@ -250,6 +260,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { policy.Mark = new(XfrmMark) policy.Mark.Value = mark.Value policy.Mark.Mask = mark.Mask + case nl.XFRMA_IF_ID: + policy.Ifid = int(native.Uint32(attr.Value)) } } diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state.go b/vendor/github.com/vishvananda/netlink/xfrm_state.go index 368a9b986d..19df82c763 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_state.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_state.go @@ -3,6 +3,7 @@ package netlink import ( "fmt" "net" + "time" ) // XfrmStateAlgo represents the algorithm to use for the ipsec encryption. @@ -67,6 +68,19 @@ type XfrmStateLimits struct { TimeUseHard uint64 } +// XfrmStateStats represents the current number of bytes/packets +// processed by this State, the State's installation and first use +// time and the replay window counters. +type XfrmStateStats struct { + ReplayWindow uint32 + Replay uint32 + Failed uint32 + Bytes uint64 + Packets uint64 + AddTime uint64 + UseTime uint64 +} + // XfrmState represents the state of an ipsec policy. It optionally // contains an XfrmStateAlgo for encryption and one for authentication. type XfrmState struct { @@ -78,7 +92,10 @@ type XfrmState struct { Reqid int ReplayWindow int Limits XfrmStateLimits + Statistics XfrmStateStats Mark *XfrmMark + OutputMark *XfrmMark + Ifid int Auth *XfrmStateAlgo Crypt *XfrmStateAlgo Aead *XfrmStateAlgo @@ -87,17 +104,23 @@ type XfrmState struct { } func (sa XfrmState) String() string { - return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", - sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) } func (sa XfrmState) Print(stats bool) string { if !stats { return sa.String() } - - return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d", + at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate) + ut := "-" + if sa.Statistics.UseTime > 0 { + ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate) + } + return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+ + "AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d", sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard), - sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard) + sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut, + sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed) } func printLimit(lmt uint64) string { diff --git a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go index 6a7bc0deca..61a2d2dea2 100644 --- a/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -2,10 +2,10 @@ package netlink import ( "fmt" - "syscall" "unsafe" "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) func writeStateAlgo(a *XfrmStateAlgo) []byte { @@ -69,8 +69,10 @@ func writeReplayEsn(replayWindow int) []byte { ReplayWindow: uint32(replayWindow), } - // taken from iproute2/ip/xfrm_state.c: - replayEsn.BmpLen = uint32((replayWindow + (4 * 8) - 1) / (4 * 8)) + // Linux stores the bitmap to identify the already received sequence packets in blocks of uint32 elements. + // Therefore bitmap length is the minimum number of uint32 elements needed. The following is a ceiling operation. + bytesPerElem := int(unsafe.Sizeof(replayEsn.BmpLen)) // Any uint32 variable is good for this + replayEsn.BmpLen = uint32((replayWindow + (bytesPerElem * 8) - 1) / (bytesPerElem * 8)) return replayEsn.Serialize() } @@ -109,9 +111,9 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { // A state with spi 0 can't be deleted so don't allow it to be set if state.Spi == 0 { - return fmt.Errorf("Spi must be set when adding xfrm state.") + return fmt.Errorf("Spi must be set when adding xfrm state") } - req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) msg := xfrmUsersaInfoFromXfrmState(state) @@ -156,14 +158,27 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { out := nl.NewRtAttr(nl.XFRMA_REPLAY_ESN_VAL, writeReplayEsn(state.ReplayWindow)) req.AddData(out) } + if state.OutputMark != nil { + out := nl.NewRtAttr(nl.XFRMA_SET_MARK, nl.Uint32Attr(state.OutputMark.Value)) + req.AddData(out) + if state.OutputMark.Mask != 0 { + out = nl.NewRtAttr(nl.XFRMA_SET_MARK_MASK, nl.Uint32Attr(state.OutputMark.Mask)) + req.AddData(out) + } + } - _, err := req.Execute(syscall.NETLINK_XFRM, 0) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } + + _, err := req.Execute(unix.NETLINK_XFRM, 0) return err } func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { req := h.newNetlinkRequest(nl.XFRM_MSG_ALLOCSPI, - syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) + unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) msg := &nl.XfrmUserSpiInfo{} msg.XfrmUsersaInfo = *(xfrmUsersaInfoFromXfrmState(state)) @@ -177,17 +192,12 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { req.AddData(out) } - msgs, err := req.Execute(syscall.NETLINK_XFRM, 0) + msgs, err := req.Execute(unix.NETLINK_XFRM, 0) if err != nil { return nil, err } - s, err := parseXfrmState(msgs[0], FAMILY_ALL) - if err != nil { - return nil, err - } - - return s, err + return parseXfrmState(msgs[0], FAMILY_ALL) } // XfrmStateDel will delete an xfrm state from the system. Note that @@ -216,9 +226,9 @@ func XfrmStateList(family int) ([]XfrmState, error) { // Equivalent to: `ip xfrm state show`. // The list can be filtered by ip family. func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) { - req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP) + req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, unix.NLM_F_DUMP) - msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA) + msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWSA) if err != nil { return nil, err } @@ -255,7 +265,7 @@ func (h *Handle) XfrmStateGet(state *XfrmState) (*XfrmState, error) { } func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState, error) { - req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK) msg := &nl.XfrmUsersaId{} msg.Family = uint16(nl.GetIPFamily(state.Dst)) @@ -273,12 +283,17 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState req.AddData(out) } + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } + resType := nl.XFRM_MSG_NEWSA if nlProto == nl.XFRM_MSG_DELSA { resType = 0 } - msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType)) + msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType)) if err != nil { return nil, err } @@ -308,6 +323,7 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { state.Reqid = int(msg.Reqid) state.ReplayWindow = int(msg.ReplayWindow) lftToLimits(&msg.Lft, &state.Limits) + curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) return &state } @@ -369,6 +385,21 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { state.Mark = new(XfrmMark) state.Mark.Value = mark.Value state.Mark.Mask = mark.Mask + case nl.XFRMA_SET_MARK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Value = native.Uint32(attr.Value) + case nl.XFRMA_SET_MARK_MASK: + if state.OutputMark == nil { + state.OutputMark = new(XfrmMark) + } + state.OutputMark.Mask = native.Uint32(attr.Value) + if state.OutputMark.Mask == 0xffffffff { + state.OutputMark.Mask = 0 + } + case nl.XFRMA_IF_ID: + state.Ifid = int(native.Uint32(attr.Value)) } } @@ -386,16 +417,12 @@ func XfrmStateFlush(proto Proto) error { // proto = 0 means any transformation protocols // Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]` func (h *Handle) XfrmStateFlush(proto Proto) error { - req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, syscall.NLM_F_ACK) + req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, unix.NLM_F_ACK) req.AddData(&nl.XfrmUsersaFlush{Proto: uint8(proto)}) - _, err := req.Execute(syscall.NETLINK_XFRM, 0) - if err != nil { - return err - } - - return nil + _, err := req.Execute(unix.NETLINK_XFRM, 0) + return err } func limitsToLft(lmts XfrmStateLimits, lft *nl.XfrmLifetimeCfg) { @@ -429,6 +456,16 @@ func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) { *lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft)) } +func curToStats(cur *nl.XfrmLifetimeCur, wstats *nl.XfrmStats, stats *XfrmStateStats) { + stats.Bytes = cur.Bytes + stats.Packets = cur.Packets + stats.AddTime = cur.AddTime + stats.UseTime = cur.UseTime + stats.ReplayWindow = wstats.ReplayWindow + stats.Replay = wstats.Replay + stats.Failed = wstats.IntegrityFailed +} + func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { msg := &nl.XfrmUsersaInfo{} msg.Family = uint16(nl.GetIPFamily(state.Dst)) diff --git a/vendor/github.com/vishvananda/netns/README.md b/vendor/github.com/vishvananda/netns/README.md index 66a5f7258b..6b45cfb892 100644 --- a/vendor/github.com/vishvananda/netns/README.md +++ b/vendor/github.com/vishvananda/netns/README.md @@ -37,7 +37,6 @@ func main() { // Create a new network namespace newns, _ := netns.New() - netns.Set(newns) defer newns.Close() // Do something with the network namespace diff --git a/vendor/github.com/vishvananda/netns/netns.go b/vendor/github.com/vishvananda/netns/netns.go index dd2f21570a..116befd548 100644 --- a/vendor/github.com/vishvananda/netns/netns.go +++ b/vendor/github.com/vishvananda/netns/netns.go @@ -10,7 +10,8 @@ package netns import ( "fmt" - "syscall" + + "golang.org/x/sys/unix" ) // NsHandle is a handle to a network namespace. It can be cast directly @@ -24,11 +25,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { if ns == other { return true } - var s1, s2 syscall.Stat_t - if err := syscall.Fstat(int(ns), &s1); err != nil { + var s1, s2 unix.Stat_t + if err := unix.Fstat(int(ns), &s1); err != nil { return false } - if err := syscall.Fstat(int(other), &s2); err != nil { + if err := unix.Fstat(int(other), &s2); err != nil { return false } return (s1.Dev == s2.Dev) && (s1.Ino == s2.Ino) @@ -36,11 +37,11 @@ func (ns NsHandle) Equal(other NsHandle) bool { // String shows the file descriptor number and its dev and inode. func (ns NsHandle) String() string { - var s syscall.Stat_t if ns == -1 { return "NS(None)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return fmt.Sprintf("NS(%d: unknown)", ns) } return fmt.Sprintf("NS(%d: %d, %d)", ns, s.Dev, s.Ino) @@ -49,11 +50,11 @@ func (ns NsHandle) String() string { // UniqueId returns a string which uniquely identifies the namespace // associated with the network handle. func (ns NsHandle) UniqueId() string { - var s syscall.Stat_t if ns == -1 { return "NS(none)" } - if err := syscall.Fstat(int(ns), &s); err != nil { + var s unix.Stat_t + if err := unix.Fstat(int(ns), &s); err != nil { return "NS(unknown)" } return fmt.Sprintf("NS(%d:%d)", s.Dev, s.Ino) @@ -67,7 +68,7 @@ func (ns NsHandle) IsOpen() bool { // Close closes the NsHandle and resets its file descriptor to -1. // It is not safe to use an NsHandle after Close() is called. func (ns *NsHandle) Close() error { - if err := syscall.Close(int(*ns)); err != nil { + if err := unix.Close(int(*ns)); err != nil { return err } (*ns) = -1 diff --git a/vendor/github.com/vishvananda/netns/netns_linux.go b/vendor/github.com/vishvananda/netns/netns_linux.go index a267c71059..c76acd0870 100644 --- a/vendor/github.com/vishvananda/netns/netns_linux.go +++ b/vendor/github.com/vishvananda/netns/netns_linux.go @@ -6,44 +6,30 @@ import ( "fmt" "io/ioutil" "os" + "path" "path/filepath" - "runtime" "strconv" "strings" "syscall" -) -// SYS_SETNS syscall allows changing the namespace of the current process. -var SYS_SETNS = map[string]uintptr{ - "386": 346, - "amd64": 308, - "arm64": 268, - "arm": 375, - "mips": 4344, - "mipsle": 4344, - "ppc64": 350, - "ppc64le": 350, - "s390x": 339, -}[runtime.GOARCH] + "golang.org/x/sys/unix" +) // Deprecated: use syscall pkg instead (go >= 1.5 needed). const ( - CLONE_NEWUTS = 0x04000000 /* New utsname group? */ - CLONE_NEWIPC = 0x08000000 /* New ipcs */ - CLONE_NEWUSER = 0x10000000 /* New user namespace */ - CLONE_NEWPID = 0x20000000 /* New pid namespace */ - CLONE_NEWNET = 0x40000000 /* New network namespace */ - CLONE_IO = 0x80000000 /* Get io context */ + CLONE_NEWUTS = 0x04000000 /* New utsname group? */ + CLONE_NEWIPC = 0x08000000 /* New ipcs */ + CLONE_NEWUSER = 0x10000000 /* New user namespace */ + CLONE_NEWPID = 0x20000000 /* New pid namespace */ + CLONE_NEWNET = 0x40000000 /* New network namespace */ + CLONE_IO = 0x80000000 /* Get io context */ + bindMountPath = "/run/netns" /* Bind mount path for named netns */ ) // Setns sets namespace using syscall. Note that this should be a method // in syscall but it has not been added. func Setns(ns NsHandle, nstype int) (err error) { - _, _, e1 := syscall.Syscall(SYS_SETNS, uintptr(ns), uintptr(nstype), 0) - if e1 != 0 { - err = e1 - } - return + return unix.Setns(int(ns), nstype) } // Set sets the current network namespace to the namespace represented @@ -52,23 +38,67 @@ func Set(ns NsHandle) (err error) { return Setns(ns, CLONE_NEWNET) } -// New creates a new network namespace and returns a handle to it. +// New creates a new network namespace, sets it as current and returns +// a handle to it. func New() (ns NsHandle, err error) { - if err := syscall.Unshare(CLONE_NEWNET); err != nil { + if err := unix.Unshare(CLONE_NEWNET); err != nil { return -1, err } return Get() } +// NewNamed creates a new named network namespace and returns a handle to it +func NewNamed(name string) (NsHandle, error) { + if _, err := os.Stat(bindMountPath); os.IsNotExist(err) { + err = os.MkdirAll(bindMountPath, 0755) + if err != nil { + return None(), err + } + } + + newNs, err := New() + if err != nil { + return None(), err + } + + namedPath := path.Join(bindMountPath, name) + + f, err := os.OpenFile(namedPath, os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + return None(), err + } + f.Close() + + nsPath := fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid()) + err = syscall.Mount(nsPath, namedPath, "bind", syscall.MS_BIND, "") + if err != nil { + return None(), err + } + + return newNs, nil +} + +// DeleteNamed deletes a named network namespace +func DeleteNamed(name string) error { + namedPath := path.Join(bindMountPath, name) + + err := syscall.Unmount(namedPath, syscall.MNT_DETACH) + if err != nil { + return err + } + + return os.Remove(namedPath) +} + // Get gets a handle to the current threads network namespace. func Get() (NsHandle, error) { - return GetFromThread(os.Getpid(), syscall.Gettid()) + return GetFromThread(os.Getpid(), unix.Gettid()) } // GetFromPath gets a handle to a network namespace // identified by the path func GetFromPath(path string) (NsHandle, error) { - fd, err := syscall.Open(path, syscall.O_RDONLY, 0) + fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0) if err != nil { return -1, err } @@ -188,6 +218,12 @@ func getPidForContainer(id string) (int, error) { filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"), // Even more recent docker versions under cgroup/systemd/docker// filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"), + // Kubernetes with docker and CNI is even more different + filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"), + // Another flavor of containers location in recent kubernetes 1.11+ + filepath.Join(cgroupRoot, cgroupThis, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), + // When runs inside of a container with recent kubernetes 1.11+ + filepath.Join(cgroupRoot, "kubepods.slice", "kubepods-besteffort.slice", "*", "docker-"+id+".scope", "tasks"), } var filename string diff --git a/vendor/modules.txt b/vendor/modules.txt index 3a41a2ad79..9d96389c38 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -33,12 +33,12 @@ github.com/ulikunitz/xz github.com/ulikunitz/xz/internal/hash github.com/ulikunitz/xz/internal/xlog github.com/ulikunitz/xz/lzma -# github.com/vishvananda/netlink v0.0.0-20170808154308-f5a6f697a596 -## explicit +# github.com/vishvananda/netlink v1.2.1-beta.2 +## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl -# github.com/vishvananda/netns v0.0.0-20170707011535-86bef332bfc3 -## explicit +# github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae +## explicit; go 1.12 github.com/vishvananda/netns # github.com/vmware/vmw-guestinfo v0.0.0-20220317130741-510905f0efa3 ## explicit; go 1.12