From 0fdc2ac5e91ce7fb00070ef860a0e62f76d89819 Mon Sep 17 00:00:00 2001 From: Semir Patel Date: Wed, 22 Nov 2023 10:06:11 -0600 Subject: [PATCH] v2tenancy: namespace deletion using finalizers (#19714) --- .../internal/controllers/common/common.go | 196 ++++++++++++++++++ .../controllers/namespace/controller.go | 95 +++++++++ .../internal/controllers/register_ce.go | 3 +- .../tenancytest/namespace_controller_test.go | 164 +++++++++++++++ 4 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 internal/tenancy/internal/controllers/common/common.go create mode 100644 internal/tenancy/internal/controllers/namespace/controller.go create mode 100644 internal/tenancy/tenancytest/namespace_controller_test.go diff --git a/internal/tenancy/internal/controllers/common/common.go b/internal/tenancy/internal/controllers/common/common.go new file mode 100644 index 000000000000..38b8af862d93 --- /dev/null +++ b/internal/tenancy/internal/controllers/common/common.go @@ -0,0 +1,196 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +// Common code shared by the partition and namespace controllers. +package common + +import ( + "context" + "errors" + "fmt" + + "github.com/hashicorp/consul/internal/controller" + "github.com/hashicorp/consul/internal/resource" + "github.com/hashicorp/consul/proto-public/pbresource" +) + +const ( + // ConditionAccepted indicates the tenancy unit has a finalizer + // and contains a default namespace if a partition. + ConditionAccepted = "accepted" + ReasonAcceptedOK = "Ok" + ReasonEnsureHasFinalizerFailed = "EnsureHasFinalizerFailed" + + // ConditionDeleted indicates that the units tenants have been + // deleted. It never has a state other than false because the + // resource no longer exists at that point. + ConditionDeleted = "deleted" + ReasonDeletionInProgress = "DeletionInProgress" +) + +var ( + ErrStillHasTenants = errors.New("still has tenants") +) + +func EnsureHasFinalizer(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string) error { + // The statusKey doubles as the finalizer name for tenancy resources. + if resource.HasFinalizer(res, statusKey) { + rt.Logger.Trace("already has finalizer") + return nil + } + + // Finalizer hasn't been written, so add it. + resource.AddFinalizer(res, statusKey) + _, err := rt.Client.Write(ctx, &pbresource.WriteRequest{Resource: res}) + if err != nil { + return WriteStatus(ctx, rt, res, statusKey, ConditionAccepted, ReasonEnsureHasFinalizerFailed, err) + } + rt.Logger.Trace("added finalizer") + return err +} + +func EnsureTenantsDeleted(ctx context.Context, rt controller.Runtime, registry resource.Registry, res *pbresource.Resource, tenantScope resource.Scope, tenancy *pbresource.Tenancy) error { + // Useful stats to keep track of on every sweep + numExistingHasFinalizer := 0 + numExistingOwned := 0 + numImmediateDeletes := 0 + numDeferredDeletes := 0 + + // List doesn't support querying across all types so iterate through each one. + for _, reg := range registry.Types() { + // Skip tenants that aren't scoped to the tenancy unit. + if reg.Scope != tenantScope { + continue + } + + // Get all tenants of the current type. + rsp, err := rt.Client.List(ctx, &pbresource.ListRequest{Type: reg.Type, Tenancy: tenancy}) + if err != nil { + return err + } + + if len(rsp.Resources) > 0 { + rt.Logger.Trace(fmt.Sprintf("found %d tenant %s", len(rsp.Resources), reg.Type.Kind)) + } + + // Delete each qualified tenant. + for _, tenant := range rsp.Resources { + // Owned resources will be deleted when the parent resource is deleted (tombstone reaper) + // so just skip over them. + if tenant.Owner != nil { + numExistingOwned++ + continue + } + + // Skip anything that is already marked for deletion and has finalizers + // since deletion of those resource is out of our control. + if resource.IsMarkedForDeletion(tenant) && resource.HasFinalizers(tenant) { + numExistingHasFinalizer++ + continue + } + + // Delete tenant with a blanket non-CAS delete since we don't care about the version that + // is deleted. Since we don't know whether the delete was immediate or deferred due to the + // presense of a finalizer, we can't assume that the tenant is really deleted. + _, err = rt.Client.Delete(ctx, &pbresource.DeleteRequest{Id: tenant.Id, Version: ""}) + if err != nil { + // Bail on the first sign of trouble and retry in future reconciles. + return err + } + + // Classify the just deleted tenant since we're not fully vacated if a deferred delete occurred. + if resource.HasFinalizers(tenant) { + rt.Logger.Trace(fmt.Sprintf("deferred delete of %s tenant %q", res.Id.Type.Kind, tenant.Id.Name)) + numDeferredDeletes++ + } else { + rt.Logger.Trace(fmt.Sprintf("immediate delete of %s tenant %q", res.Id.Type.Kind, tenant.Id.Name)) + numImmediateDeletes++ + } + } + } + + // Force re-reconcile if we have any lingering tenants by returning an error. + if numExistingOwned+numExistingHasFinalizer+numDeferredDeletes > 0 { + if numExistingOwned > 0 { + rt.Logger.Debug(fmt.Sprintf("delete blocked on %d remaining owned tenants", numExistingOwned)) + } + if numExistingHasFinalizer > 0 { + rt.Logger.Debug(fmt.Sprintf("delete blocked on %d remaining tenants with finalizers", numExistingHasFinalizer)) + } + if numDeferredDeletes > 0 { + rt.Logger.Debug(fmt.Sprintf("delete blocked on %d tenants which were just marked for deletion", numDeferredDeletes)) + } + return ErrStillHasTenants + } + + // We should have zero tenants and be good to continue. + rt.Logger.Debug("no tenants - green light the delete") + return nil +} + +// EnsureResourceDelete makes sure a tenancy unit (partition or namespace) with no tenants is finally deleted. +func EnsureResourceDeleted(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string) error { + // Remove finalizer if present + if resource.HasFinalizer(res, statusKey) { + resource.RemoveFinalizer(res, statusKey) + _, err := rt.Client.Write(ctx, &pbresource.WriteRequest{Resource: res}) + if err != nil { + rt.Logger.Error("failed write to remove finalizer") + return WriteStatus(ctx, rt, res, statusKey, ConditionDeleted, ReasonDeletionInProgress, err) + } + rt.Logger.Trace("removed finalizer") + } + + // Finally, delete the tenancy unit. + _, err := rt.Client.Delete(ctx, &pbresource.DeleteRequest{Id: res.Id}) + if err != nil { + rt.Logger.Error("failed final delete", "error", err) + return WriteStatus(ctx, rt, res, statusKey, ConditionDeleted, ReasonDeletionInProgress, err) + } + + // Success + rt.Logger.Trace("finally deleted") + return nil +} + +// WriteStatus writes the tenancy resource status only if the status has changed. +// The state and message are based on whether the passed in error is nil +// (state=TRUE, message="") or not (state=FALSE, message=error). The passed in +// error is always returned unless the delegated call to client.WriteStatus +// itself fails. +func WriteStatus(ctx context.Context, rt controller.Runtime, res *pbresource.Resource, statusKey string, condition string, reason string, err error) error { + state := pbresource.Condition_STATE_TRUE + message := "" + if err != nil { + state = pbresource.Condition_STATE_FALSE + message = err.Error() + } + + newStatus := &pbresource.Status{ + ObservedGeneration: res.Generation, + Conditions: []*pbresource.Condition{{ + Type: condition, + State: state, + Reason: reason, + Message: message, + }}, + } + + // Skip the write if the status hasn't changed to keep write amplificiation in check. + if resource.EqualStatus(res.Status[statusKey], newStatus, false) { + return err + } + + _, statusErr := rt.Client.WriteStatus(ctx, &pbresource.WriteStatusRequest{ + Id: res.Id, + Key: statusKey, + Status: newStatus, + }) + + if statusErr != nil { + rt.Logger.Error("failed writing status", "error", statusErr) + return statusErr + } + rt.Logger.Trace("wrote status", "status", newStatus) + return err +} diff --git a/internal/tenancy/internal/controllers/namespace/controller.go b/internal/tenancy/internal/controllers/namespace/controller.go new file mode 100644 index 000000000000..0d7a73e13574 --- /dev/null +++ b/internal/tenancy/internal/controllers/namespace/controller.go @@ -0,0 +1,95 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package namespace + +import ( + "context" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/hashicorp/consul/internal/controller" + "github.com/hashicorp/consul/internal/resource" + "github.com/hashicorp/consul/internal/tenancy/internal/controllers/common" + "github.com/hashicorp/consul/proto-public/pbresource" + pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1" +) + +const ( + // StatusKey also serves as the finalizer name. + StatusKey = "consul.io/namespace-controller" + + // Conditions and reasons are shared with partitions. See + // common.go for the full list. +) + +func Controller(registry resource.Registry) controller.Controller { + return controller.ForType(pbtenancy.NamespaceType). + WithReconciler(&Reconciler{Registry: registry}) +} + +type Reconciler struct { + Registry resource.Registry +} + +// Reconcile is responsible for reconciling a namespace resource. +// +// When a namespace is created, ensures a finalizer is added for cleanup. +// +// When a namespace is marked for deletion, ensures tenants are deleted and +// the finalizer is removed. +func (r *Reconciler) Reconcile(ctx context.Context, rt controller.Runtime, req controller.Request) error { + rt.Logger = rt.Logger.With("resource", req.ID.Name, "controller", "namespace") + + // Never reconcile the default namespace in the default partition since it is + // created on system startup or snapshot restoration. Resource validation rules + // protect them being deleted. + if req.ID.Tenancy.Partition == resource.DefaultPartitionName && req.ID.Name == resource.DefaultNamespaceName { + rt.Logger.Trace("skipping reconcile of default namespace") + return nil + } + + // Read namespace to make sure we have the latest version. + rsp, err := rt.Client.Read(ctx, &pbresource.ReadRequest{Id: req.ID}) + switch { + case status.Code(err) == codes.NotFound: + // Namespace deleted - nothing to do. + rt.Logger.Trace("namespace not found, nothing to do") + return nil + case err != nil: + rt.Logger.Error("failed read", "error", err) + return err + } + res := rsp.Resource + + if resource.IsMarkedForDeletion(res) { + return ensureDeleted(ctx, rt, r.Registry, res) + } + + if err = common.EnsureHasFinalizer(ctx, rt, res, StatusKey); err != nil { + return err + } + + return common.WriteStatus(ctx, rt, res, StatusKey, common.ConditionAccepted, common.ReasonAcceptedOK, err) +} + +func ensureDeleted(ctx context.Context, rt controller.Runtime, registry resource.Registry, res *pbresource.Resource) error { + tenancy := &pbresource.Tenancy{ + Partition: res.Id.Tenancy.Partition, + Namespace: res.Id.Name, + PeerName: resource.DefaultPeerName, + } + // Delete namespace scoped tenants + if err := common.EnsureTenantsDeleted(ctx, rt, registry, res, resource.ScopeNamespace, tenancy); err != nil { + rt.Logger.Error("failed deleting tenants", "error", err) + return common.WriteStatus(ctx, rt, res, StatusKey, common.ConditionDeleted, common.ReasonDeletionInProgress, err) + } + + // Delete namespace resource since all namespace scoped tenants are deleted + if err := common.EnsureResourceDeleted(ctx, rt, res, StatusKey); err != nil { + rt.Logger.Error("failed deleting namespace", "error", err) + return err + } + return nil +} diff --git a/internal/tenancy/internal/controllers/register_ce.go b/internal/tenancy/internal/controllers/register_ce.go index a1623e5e95b5..6d346de9b073 100644 --- a/internal/tenancy/internal/controllers/register_ce.go +++ b/internal/tenancy/internal/controllers/register_ce.go @@ -7,8 +7,9 @@ package controllers import ( "github.com/hashicorp/consul/internal/controller" + "github.com/hashicorp/consul/internal/tenancy/internal/controllers/namespace" ) func Register(mgr *controller.Manager, deps Dependencies) { - //mgr.Register(namespace.NamespaceController()) + mgr.Register(namespace.Controller(deps.Registry)) } diff --git a/internal/tenancy/tenancytest/namespace_controller_test.go b/internal/tenancy/tenancytest/namespace_controller_test.go new file mode 100644 index 000000000000..886224ed9455 --- /dev/null +++ b/internal/tenancy/tenancytest/namespace_controller_test.go @@ -0,0 +1,164 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package tenancytest + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + svc "github.com/hashicorp/consul/agent/grpc-external/services/resource" + svctest "github.com/hashicorp/consul/agent/grpc-external/services/resource/testing" + "github.com/hashicorp/consul/internal/controller" + "github.com/hashicorp/consul/internal/resource" + "github.com/hashicorp/consul/internal/resource/demo" + rtest "github.com/hashicorp/consul/internal/resource/resourcetest" + "github.com/hashicorp/consul/internal/tenancy" + "github.com/hashicorp/consul/internal/tenancy/internal/controllers/common" + "github.com/hashicorp/consul/internal/tenancy/internal/controllers/namespace" + "github.com/hashicorp/consul/proto-public/pbresource" + pbtenancy "github.com/hashicorp/consul/proto-public/pbtenancy/v2beta1" + pbdemo "github.com/hashicorp/consul/proto/private/pbdemo/v1" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/sdk/testutil/retry" +) + +// Due to a circular dependency, this test can't reside in the package next to the controller it is testing. + +type nsTestSuite struct { + suite.Suite + + client *rtest.Client + runtime controller.Runtime + ctx context.Context + registry resource.Registry +} + +func (ts *nsTestSuite) SetupTest() { + ts.registry = resource.NewRegistry() + tenancyBridge := tenancy.NewV2TenancyBridge() + config := svc.Config{TenancyBridge: tenancyBridge, Registry: ts.registry, UseV2Tenancy: true} + resourceClient := svctest.RunResourceServiceWithConfig(ts.T(), config, tenancy.RegisterTypes, demo.RegisterTypes) + tenancyBridge.WithClient(resourceClient) + ts.client = rtest.NewClient(resourceClient) + ts.runtime = controller.Runtime{Client: resourceClient, Logger: testutil.Logger(ts.T())} + ts.ctx = testutil.TestContext(ts.T()) + + mgr := controller.NewManager(ts.client, testutil.Logger(ts.T())) + mgr.Register(namespace.Controller(ts.registry)) + mgr.SetRaftLeader(true) + ctx, cancel := context.WithCancel(context.Background()) + ts.T().Cleanup(cancel) + go mgr.Run(ctx) +} + +func (ts *nsTestSuite) waitForReconciliation(id *pbresource.ID, reason string) { + ts.T().Helper() + + retry.Run(ts.T(), func(r *retry.R) { + rsp, err := ts.client.Read(context.Background(), &pbresource.ReadRequest{ + Id: id, + }) + require.NoError(r, err) + + status, found := rsp.Resource.Status[namespace.StatusKey] + require.True(r, found) + require.Len(r, status.Conditions, 1) + require.Equal(r, reason, status.Conditions[0].Reason) + }) +} + +func (ts *nsTestSuite) TestNamespaceController_HappyPath() { + // Create namespace ns1 + ns1 := rtest.Resource(pbtenancy.NamespaceType, "ns1"). + // Keep this CE friendly by using default partition + WithTenancy(resource.DefaultPartitionedTenancy()). + WithData(ts.T(), &pbtenancy.Namespace{Description: "namespace ns1"}). + Write(ts.T(), ts.client) + + // Wait for it to be accepted + ts.waitForReconciliation(ns1.Id, common.ReasonAcceptedOK) + + // Verify namespace finalizer added + ns1 = ts.client.RequireResourceMeta(ts.T(), ns1.Id, resource.FinalizerKey, namespace.StatusKey) + + // Add a namespace scoped tenant to the namespace + artist1 := rtest.Resource(demo.TypeV1Artist, "moonchild"). + WithTenancy(&pbresource.Tenancy{ + Partition: resource.DefaultPartitionName, + Namespace: ns1.Id.Name, + PeerName: resource.DefaultPeerName, + }). + WithData(ts.T(), &pbdemo.Artist{Name: "Moonchild"}). + Write(ts.T(), ts.client) + + // Delete the namespace + _, err := ts.client.Delete(ts.ctx, &pbresource.DeleteRequest{Id: ns1.Id}) + require.NoError(ts.T(), err) + + // Wait for the namespace to be deleted + ts.client.WaitForDeletion(ts.T(), ns1.Id) + + // Verify tenants deleted. + ts.client.RequireResourceNotFound(ts.T(), artist1.Id) +} + +func (ts *nsTestSuite) TestNamespaceController_DeleteBlockedByTenantsWithFinalizers() { + // Create namespace ns1 + ns1 := rtest.Resource(pbtenancy.NamespaceType, "ns1"). + WithTenancy(resource.DefaultPartitionedTenancy()). + WithData(ts.T(), &pbtenancy.Namespace{Description: "namespace ns1"}). + Write(ts.T(), ts.client) + + // Wait for it to be accepted + ts.waitForReconciliation(ns1.Id, common.ReasonAcceptedOK) + + // Add artist to namespace + _ = rtest.Resource(demo.TypeV1Artist, "weezer"). + WithTenancy(&pbresource.Tenancy{ + Partition: resource.DefaultPartitionName, + Namespace: ns1.Id.Name, + PeerName: resource.DefaultPeerName, + }). + WithData(ts.T(), &pbdemo.Artist{Name: "Weezer"}). + Write(ts.T(), ts.client) + + // Add another artist to namespace with a finalizer so that is blocks namespace deletion. + artist2 := rtest.Resource(demo.TypeV1Artist, "foofighters"). + WithTenancy(&pbresource.Tenancy{ + Partition: resource.DefaultPartitionName, + Namespace: ns1.Id.Name, + PeerName: resource.DefaultPeerName, + }). + WithData(ts.T(), &pbdemo.Artist{Name: "Foo Fighters"}). + WithMeta(resource.FinalizerKey, "finalizer2"). + Write(ts.T(), ts.client) + + // Delete the namespace - this activates the controller logic to delete all tenants + ts.client.Delete(ts.ctx, &pbresource.DeleteRequest{Id: ns1.Id}) + + // Delete should be blocked by artist2 tenant with finalizer + ts.client.WaitForStatusConditionAnyGen(ts.T(), ns1.Id, namespace.StatusKey, &pbresource.Condition{ + Type: common.ConditionDeleted, + State: pbresource.Condition_STATE_FALSE, + Reason: common.ReasonDeletionInProgress, + Message: common.ErrStillHasTenants.Error(), + }) + + // Remove the finalizer on artist2 to unblock deletion of ns1 + artist2 = ts.client.RequireResourceExists(ts.T(), artist2.Id) + resource.RemoveFinalizer(artist2, "finalizer2") + _, err := ts.client.Write(ts.ctx, &pbresource.WriteRequest{Resource: artist2}) + require.NoError(ts.T(), err) + + // The final reconcile should delete artist since it was marked for deletion and + // and has no finalizers. Given no more tenants, wait for namespace to be deleted. + ts.client.WaitForDeletion(ts.T(), ns1.Id) +} + +func TestNamespaceControllerSuite(t *testing.T) { + suite.Run(t, new(nsTestSuite)) +}