From 2e6784b0073407c67cd3a946c8dc2a47a945c11a Mon Sep 17 00:00:00 2001 From: justinsb Date: Tue, 12 Nov 2024 09:22:03 -0500 Subject: [PATCH] metal: simple IPAM for IPv6 --- cmd/kops-controller/controllers/gceipam.go | 2 +- cmd/kops-controller/controllers/metalipam.go | 103 ++++++++++++++++++ cmd/kops-controller/main.go | 6 + cmd/kops/toolbox_enroll.go | 1 + docs/cli/kops_toolbox_enroll.md | 1 + k8s/crds/kops.k8s.io_hosts.yaml | 6 + nodeup/pkg/model/kube_apiserver.go | 58 +++++++++- nodeup/pkg/model/prefix.go | 2 + pkg/apis/kops/v1alpha2/host.go | 3 + .../kops/v1alpha2/zz_generated.deepcopy.go | 7 +- pkg/commands/toolbox_enroll.go | 4 + pkg/kubeconfig/create_kubecfg.go | 16 ++- tests/e2e/scenarios/bare-metal/scenario-ipv6 | 31 ++++-- 13 files changed, 220 insertions(+), 20 deletions(-) create mode 100644 cmd/kops-controller/controllers/metalipam.go diff --git a/cmd/kops-controller/controllers/gceipam.go b/cmd/kops-controller/controllers/gceipam.go index 0a750f4c0b1de..37a8e8c9ad485 100644 --- a/cmd/kops-controller/controllers/gceipam.go +++ b/cmd/kops-controller/controllers/gceipam.go @@ -56,7 +56,7 @@ func NewGCEIPAMReconciler(mgr manager.Manager) (*GCEIPAMReconciler, error) { return r, nil } -// GCEIPAMReconciler observes Node objects, assigning their`PodCIDRs` from the instance's `ExternalIpv6`. +// GCEIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. type GCEIPAMReconciler struct { // client is the controller-runtime client client client.Client diff --git a/cmd/kops-controller/controllers/metalipam.go b/cmd/kops-controller/controllers/metalipam.go new file mode 100644 index 0000000000000..9f4c55181ffa5 --- /dev/null +++ b/cmd/kops-controller/controllers/metalipam.go @@ -0,0 +1,103 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/klog/v2" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// NewMetalIPAMReconciler is the constructor for a MetalIPAMReconciler +func NewMetalIPAMReconciler(ctx context.Context, mgr manager.Manager) (*MetalIPAMReconciler, error) { + klog.Info("starting metal ipam controller") + r := &MetalIPAMReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName("metal_ipam"), + } + + coreClient, err := corev1client.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("building corev1 client: %w", err) + } + r.coreV1Client = coreClient + + return r, nil +} + +// MetalIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. +type MetalIPAMReconciler struct { + // client is the controller-runtime client + client client.Client + + // log is a logr + log logr.Logger + + // coreV1Client is a client-go client for patching nodes + coreV1Client *corev1client.CoreV1Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// Reconcile is the main reconciler function that observes node changes. +func (r *MetalIPAMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + node := &corev1.Node{} + if err := r.client.Get(ctx, req.NamespacedName, node); err != nil { + klog.Warningf("unable to fetch node %s: %v", node.Name, err) + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + host := &kopsapi.Host{} + id := types.NamespacedName{ + Namespace: "kops-system", + Name: node.Name, + } + if err := r.client.Get(ctx, id, host); err != nil { + klog.Warningf("unable to fetch host %s: %v", id, err) + return ctrl.Result{}, err + } + + if len(node.Spec.PodCIDRs) == 0 { + if err := patchNodePodCIDRs(r.coreV1Client, ctx, node, host.Spec.PodCIDRs); err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *MetalIPAMReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("metal_ipam"). + For(&corev1.Node{}). + Complete(r) +} diff --git a/cmd/kops-controller/main.go b/cmd/kops-controller/main.go index 1ac9d498dacc3..f722e6aabac45 100644 --- a/cmd/kops-controller/main.go +++ b/cmd/kops-controller/main.go @@ -392,6 +392,12 @@ func setupCloudIPAM(ctx context.Context, mgr manager.Manager, opt *config.Option return fmt.Errorf("creating gce IPAM controller: %w", err) } controller = ipamController + case "metal": + ipamController, err := controllers.NewMetalIPAMReconciler(ctx, mgr) + if err != nil { + return fmt.Errorf("creating metal IPAM controller: %w", err) + } + controller = ipamController default: return fmt.Errorf("kOps IPAM controller is not supported on cloud %q", opt.Cloud) } diff --git a/cmd/kops/toolbox_enroll.go b/cmd/kops/toolbox_enroll.go index 55d3600655be0..e5eb7067aeb4d 100644 --- a/cmd/kops/toolbox_enroll.go +++ b/cmd/kops/toolbox_enroll.go @@ -45,6 +45,7 @@ func NewCmdToolboxEnroll(f commandutils.Factory, out io.Writer) *cobra.Command { cmd.Flags().StringVar(&options.ClusterName, "cluster", options.ClusterName, "Name of cluster to join") cmd.Flags().StringVar(&options.InstanceGroup, "instance-group", options.InstanceGroup, "Name of instance-group to join") + cmd.Flags().StringSliceVar(&options.PodCIDRs, "pod-cidr", options.PodCIDRs, "IP Address range to use for pods that run on this node") cmd.Flags().StringVar(&options.Host, "host", options.Host, "IP/hostname for machine to add") cmd.Flags().StringVar(&options.SSHUser, "ssh-user", options.SSHUser, "user for ssh") diff --git a/docs/cli/kops_toolbox_enroll.md b/docs/cli/kops_toolbox_enroll.md index 2f68d815ddab3..1d40473486fbf 100644 --- a/docs/cli/kops_toolbox_enroll.md +++ b/docs/cli/kops_toolbox_enroll.md @@ -26,6 +26,7 @@ kops toolbox enroll [CLUSTER] [flags] -h, --help help for enroll --host string IP/hostname for machine to add --instance-group string Name of instance-group to join + --pod-cidr strings IP Address range to use for pods that run on this node --ssh-port int port for ssh (default 22) --ssh-user string user for ssh (default "root") ``` diff --git a/k8s/crds/kops.k8s.io_hosts.yaml b/k8s/crds/kops.k8s.io_hosts.yaml index 84ac033ee7191..10d95c221a50b 100644 --- a/k8s/crds/kops.k8s.io_hosts.yaml +++ b/k8s/crds/kops.k8s.io_hosts.yaml @@ -42,6 +42,12 @@ spec: properties: instanceGroup: type: string + podCIDRs: + description: PodCIDRs configures the IP ranges to be used for pods + on this node/host. + items: + type: string + type: array publicKey: type: string type: object diff --git a/nodeup/pkg/model/kube_apiserver.go b/nodeup/pkg/model/kube_apiserver.go index f1636a44b4674..d201272747923 100644 --- a/nodeup/pkg/model/kube_apiserver.go +++ b/nodeup/pkg/model/kube_apiserver.go @@ -19,10 +19,12 @@ package model import ( "context" "fmt" + "net" "path/filepath" "sort" "strings" + "k8s.io/klog/v2" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/flagbuilder" "k8s.io/kops/pkg/k8scodecs" @@ -77,6 +79,55 @@ func (b *KubeAPIServerBuilder) Build(c *fi.NodeupModelBuilderContext) error { } } + if b.CloudProvider() == kops.CloudProviderMetal { + // Workaround for https://github.com/kubernetes/kubernetes/issues/111671 + if b.IsIPv6Only() { + interfaces, err := net.Interfaces() + if err != nil { + return fmt.Errorf("getting local network interfaces: %w", err) + } + var ipv6s []net.IP + for _, intf := range interfaces { + addresses, err := intf.Addrs() + if err != nil { + return fmt.Errorf("getting addresses for network interface %q: %w", intf.Name, err) + } + for _, addr := range addresses { + ip, _, err := net.ParseCIDR(addr.String()) + if ip == nil { + return fmt.Errorf("parsing ip address %q (bound to network %q): %w", addr.String(), intf.Name, err) + } + if ip.To4() != nil { + // We're only looking for ipv6 + continue + } + if ip.IsLinkLocalUnicast() { + klog.V(4).Infof("ignoring link-local unicast addr %v", addr) + continue + } + if ip.IsLinkLocalMulticast() { + klog.V(4).Infof("ignoring link-local multicast addr %v", addr) + continue + } + if ip.IsLoopback() { + klog.V(4).Infof("ignoring loopback addr %v", addr) + continue + } + ipv6s = append(ipv6s, ip) + } + } + if len(ipv6s) > 1 { + klog.Warningf("found multiple ipv6s, choosing first: %v", ipv6s) + } + if len(ipv6s) == 0 { + klog.Warningf("did not find ipv6 address for kube-apiserver --advertise-address") + } + if len(ipv6s) > 0 { + kubeAPIServer.AdvertiseAddress = ipv6s[0].String() + } + } + } + b.configureOIDC(&kubeAPIServer) if err := b.writeAuthenticationConfig(c, &kubeAPIServer); err != nil { return err @@ -697,10 +748,9 @@ func (b *KubeAPIServerBuilder) buildPod(ctx context.Context, kubeAPIServer *kops image := b.RemapImage(kubeAPIServer.Image) container := &v1.Container{ - Name: "kube-apiserver", - Image: image, - Env: append(kubeAPIServer.Env, proxy.GetProxyEnvVars(b.NodeupConfig.Networking.EgressProxy)...), - LivenessProbe: livenessProbe, + Name: "kube-apiserver", + Image: image, + Env: append(kubeAPIServer.Env, proxy.GetProxyEnvVars(b.NodeupConfig.Networking.EgressProxy)...), LivenessProbe: livenessProbe, ReadinessProbe: readinessProbe, StartupProbe: startupProbe, Ports: []v1.ContainerPort{ diff --git a/nodeup/pkg/model/prefix.go b/nodeup/pkg/model/prefix.go index de796341cfc72..0381b44672d41 100644 --- a/nodeup/pkg/model/prefix.go +++ b/nodeup/pkg/model/prefix.go @@ -41,6 +41,8 @@ func (b *PrefixBuilder) Build(c *fi.NodeupModelBuilderContext) error { }) case kops.CloudProviderGCE: // Prefix is assigned by GCE + case kops.CloudProviderMetal: + // IPv6 must be configured externally (not by nodeup) default: return fmt.Errorf("kOps IPAM controller not supported on cloud %q", b.CloudProvider()) } diff --git a/pkg/apis/kops/v1alpha2/host.go b/pkg/apis/kops/v1alpha2/host.go index 7e1295ca167ec..9aa242d3b62ff 100644 --- a/pkg/apis/kops/v1alpha2/host.go +++ b/pkg/apis/kops/v1alpha2/host.go @@ -36,6 +36,9 @@ type Host struct { type HostSpec struct { PublicKey string `json:"publicKey,omitempty"` InstanceGroup string `json:"instanceGroup,omitempty"` + + // PodCIDRs configures the IP ranges to be used for pods on this node/host. + PodCIDRs []string `json:"podCIDRs,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index 99f85e64b9a70..a4a2966bb3466 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -2334,7 +2334,7 @@ func (in *Host) DeepCopyInto(out *Host) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) return } @@ -2392,6 +2392,11 @@ func (in *HostList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HostSpec) DeepCopyInto(out *HostSpec) { *out = *in + if in.PodCIDRs != nil { + in, out := &in.PodCIDRs, &out.PodCIDRs + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index 497995ef64134..c4b725f15ca7e 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -67,6 +67,9 @@ type ToolboxEnrollOptions struct { SSHUser string SSHPort int + + // PodCIDRs is the list of IP Address ranges to use for pods that run on this node + PodCIDRs []string } func (o *ToolboxEnrollOptions) InitDefaults() { @@ -209,6 +212,7 @@ func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOp host.Name = nodeName host.Spec.InstanceGroup = options.InstanceGroup host.Spec.PublicKey = string(publicKey) + host.Spec.PodCIDRs = options.PodCIDRs if err := client.Create(ctx, host); err != nil { return fmt.Errorf("failed to create host %s/%s: %w", host.Namespace, host.Name, err) diff --git a/pkg/kubeconfig/create_kubecfg.go b/pkg/kubeconfig/create_kubecfg.go index 862a021f38f9f..1e419b9588009 100644 --- a/pkg/kubeconfig/create_kubecfg.go +++ b/pkg/kubeconfig/create_kubecfg.go @@ -20,6 +20,7 @@ import ( "context" "crypto/x509/pkix" "fmt" + "net" "os/user" "sort" "time" @@ -41,7 +42,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto server = "https://" + cluster.APIInternalName() } else { if cluster.Spec.API.PublicName != "" { - server = "https://" + cluster.Spec.API.PublicName + server = "https://" + wrapIPv6Address(cluster.Spec.API.PublicName) } else { server = "https://api." + clusterName } @@ -82,7 +83,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto if len(targets) != 1 { klog.Warningf("Found multiple API endpoints (%v), choosing arbitrarily", targets) } - server = "https://" + targets[0] + server = "https://" + wrapIPv6Address(targets[0]) } } } @@ -171,3 +172,14 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto return b, nil } + +// wrapIPv6Address will wrap IPv6 addresses in square brackets, +// for use in URLs; other endpoints are unchanged. +func wrapIPv6Address(endpoint string) string { + ip := net.ParseIP(endpoint) + // IPv6 addresses are wrapped in square brackets in URLs + if ip != nil && ip.To4() == nil { + return "[" + endpoint + "]" + } + return endpoint +} diff --git a/tests/e2e/scenarios/bare-metal/scenario-ipv6 b/tests/e2e/scenarios/bare-metal/scenario-ipv6 index 29a90f616bcaf..952b49948e91e 100755 --- a/tests/e2e/scenarios/bare-metal/scenario-ipv6 +++ b/tests/e2e/scenarios/bare-metal/scenario-ipv6 @@ -70,18 +70,24 @@ ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 . hack/dev-build-metal.sh +IPV6_PREFIX=fd00:10:123:45: +IPV4_PREFIX=10.123.45. + echo "Waiting 10 seconds for VMs to start" sleep 10 +VM0_IP=${IPV4_PREFIX}10 +VM1_IP=${IPV4_PREFIX}11 +VM2_IP=${IPV4_PREFIX}12 + # Remove from known-hosts in case of reuse -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true -# Check SSH is working and accept the keys -ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime -ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime -ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime +ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@${VM0_IP} uptime +ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@${VM1_IP} uptime +ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@${VM2_IP} uptime cd ${REPO_ROOT} @@ -206,8 +212,8 @@ ${KOPS} toolbox enroll --cluster ${CLUSTER_NAME} --instance-group control-plane- cat <