From cdc5d12c08a2de25ad02b4514db23375da349baf Mon Sep 17 00:00:00 2001 From: Xuecheng Zhang Date: Mon, 26 Apr 2021 14:39:55 +0800 Subject: [PATCH 1/4] e2e: dump tidb-operator's log before uninstall tidb-operator (#3936) --- tests/e2e/e2e.go | 27 ++++++++++++++++++++++++++- tests/log_dump.go | 7 ++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/e2e/e2e.go b/tests/e2e/e2e.go index 212c2b80da..427d55bd71 100644 --- a/tests/e2e/e2e.go +++ b/tests/e2e/e2e.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path" + "path/filepath" "testing" "time" @@ -339,8 +340,32 @@ var _ = ginkgo.SynchronizedAfterSuite(func() { err := tidbcluster.DeleteCertManager(kubeCli) framework.ExpectNoError(err, "failed to delete cert-manager") - ginkgo.By("Uninstalling tidb-operator") ocfg := e2econfig.NewDefaultOperatorConfig(e2econfig.TestConfig) + + // kubetest2 can only dump running pods' log (copy from container log directory), + // but if we want to get test coverage reports for tidb-operator, we need to shutdown the processes/pods), + // so we choose to copy logs before uninstall tidb-operator. + // NOTE: if we can get the whole test result from all parallel Ginkgo nodes with Ginkgo v2 later, we can also choose to: + // - dump logs if the test failed. + // - (uninstall tidb-operator and) generate test coverage reports if the test passed. + // ref: https://github.com/onsi/ginkgo/issues/361#issuecomment-814203240 + + if framework.TestContext.ReportDir != "" { + ginkgo.By("Dumping logs for tidb-operator") + logPath := filepath.Join(framework.TestContext.ReportDir, "logs", "tidb-operator") + // full permission (0777) for the log directory to avoid "permission denied" for later kubetest2 log dump. + framework.ExpectNoError(os.MkdirAll(logPath, 0777), "failed to create log directory for tidb-operator components") + + podList, err2 := kubeCli.CoreV1().Pods(ocfg.Namespace).List(metav1.ListOptions{}) + framework.ExpectNoError(err2, "failed to list pods for tidb-operator") + for _, pod := range podList.Items { + log.Logf("dumping logs for pod %s/%s", pod.Namespace, pod.Name) + err2 = tests.DumpPod(logPath, &pod) + framework.ExpectNoError(err2, "failed to dump log for pod %s/%s", pod.Namespace, pod.Name) + } + } + + ginkgo.By("Uninstalling tidb-operator") err = tests.CleanOperator(ocfg) framework.ExpectNoError(err, "failed to uninstall operator") diff --git a/tests/log_dump.go b/tests/log_dump.go index f69905beee..9f5368c4da 100644 --- a/tests/log_dump.go +++ b/tests/log_dump.go @@ -59,7 +59,7 @@ func (oa *OperatorActions) DumpAllLogs(operatorInfo *OperatorConfig, testCluster return err } for _, pod := range operatorPods.Items { - err := dumpPod(logPath, &pod) + err := DumpPod(logPath, &pod) if err != nil { return err } @@ -74,7 +74,7 @@ func (oa *OperatorActions) DumpAllLogs(operatorInfo *OperatorConfig, testCluster return err } for _, pod := range clusterPodList.Items { - err := dumpPod(logPath, &pod) + err := DumpPod(logPath, &pod) if err != nil { return err } @@ -86,7 +86,8 @@ func (oa *OperatorActions) DumpAllLogs(operatorInfo *OperatorConfig, testCluster return nil } -func dumpPod(logPath string, pod *corev1.Pod) error { +// DumpPod dumps logs for a pod. +func DumpPod(logPath string, pod *corev1.Pod) error { logFile, err := os.Create(filepath.Join(logPath, fmt.Sprintf("%s-%s.log", pod.Name, pod.Namespace))) if err != nil { return err From 4087cbb7765018a0cd8d802a2ee023fa50f3ff54 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Mon, 26 Apr 2021 15:43:57 +0800 Subject: [PATCH 2/4] Support pod topology spread constraint (#3937) --- docs/api-references/docs.md | 129 ++++++++++ examples/advanced/tidb-cluster.yaml | 33 +++ manifests/crd.yaml | 30 +++ pkg/apis/pingcap/v1alpha1/dmcluster_test.go | 10 +- .../pingcap/v1alpha1/openapi_generated.go | 231 +++++++++++++++++- .../pingcap/v1alpha1/tidbcluster_component.go | 165 ++++++++++--- pkg/apis/pingcap/v1alpha1/tidbcluster_test.go | 10 +- pkg/apis/pingcap/v1alpha1/types.go | 42 ++++ .../pingcap/v1alpha1/zz_generated.deepcopy.go | 31 +++ pkg/manager/member/pump_member_manager.go | 41 +--- tests/dt.go | 57 +++-- tests/e2e/e2e.go | 6 + tests/e2e/tidbcluster/tidbcluster.go | 98 +++++++- 13 files changed, 783 insertions(+), 100 deletions(-) diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index a2647341a4..55a314b2e6 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -852,6 +852,23 @@ Kubernetes core/v1.PodSecurityContext

PodSecurityContext of the component

+ + +topologySpreadConstraints
+ + +[]TopologySpreadConstraint + + + + +(Optional) +

TopologySpreadConstraints describes how a group of pods ought to spread across topology +domains. Scheduler will schedule pods in a way which abides by the constraints. +This field is is only honored by clusters that enables the EvenPodsSpread feature. +All topologySpreadConstraints are ANDed.

+ + @@ -1663,6 +1680,23 @@ Kubernetes core/v1.PodSecurityContext

PodSecurityContext of the component

+ + +topologySpreadConstraints
+ + +[]TopologySpreadConstraint + + + + +(Optional) +

TopologySpreadConstraints describes how a group of pods ought to spread across topology +domains. Scheduler will schedule pods in a way which abides by the constraints. +This field is is only honored by clusters that enables the EvenPodsSpread feature. +All topologySpreadConstraints are ANDed.

+ + @@ -3769,6 +3803,10 @@ FlashSecurity +

Component

+

+

Component defines component identity of all components

+

ComponentAccessor

ComponentAccessor is the interface to access component details, which respects the cluster-level properties @@ -4090,6 +4128,23 @@ employed to update Pods in the StatefulSet when a revision is made to Template.

+ + +topologySpreadConstraints
+ + +[]TopologySpreadConstraint + + + + +(Optional) +

TopologySpreadConstraints describes how a group of pods ought to spread across topology +domains. Scheduler will schedule pods in a way which abides by the constraints. +This field is is only honored by clusters that enables the EvenPodsSpread feature. +All topologySpreadConstraints are ANDed.

+ +

ConfigMapRef

@@ -4776,6 +4831,23 @@ Kubernetes core/v1.PodSecurityContext

PodSecurityContext of the component

+ + +topologySpreadConstraints
+ + +[]TopologySpreadConstraint + + + + +(Optional) +

TopologySpreadConstraints describes how a group of pods ought to spread across topology +domains. Scheduler will schedule pods in a way which abides by the constraints. +This field is is only honored by clusters that enables the EvenPodsSpread feature. +All topologySpreadConstraints are ANDed.

+ +

DMClusterStatus

@@ -19932,6 +20004,23 @@ Kubernetes core/v1.PodSecurityContext

PodSecurityContext of the component

+ + +topologySpreadConstraints
+ + +[]TopologySpreadConstraint + + + + +(Optional) +

TopologySpreadConstraints describes how a group of pods ought to spread across topology +domains. Scheduler will schedule pods in a way which abides by the constraints. +This field is is only honored by clusters that enables the EvenPodsSpread feature. +All topologySpreadConstraints are ANDed.

+ +

TidbClusterStatus

@@ -20774,6 +20863,46 @@ BasicAutoScalerStatus +

TopologySpreadConstraint

+

+(Appears on: +ComponentSpec, +DMClusterSpec, +TidbClusterSpec) +

+

+

TopologySpreadConstraint specifies how to spread matching pods among the given topology. +It is a minimal version of corev1.TopologySpreadConstraint to avoid to add too many fields of API +Refer to https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints

+

+ + + + + + + + + + + + + +
FieldDescription
+topologyKey
+ +string + +
+

TopologyKey is the key of node labels. Nodes that have a label with this key +and identical values are considered to be in the same topology. +We consider each as a “bucket”, and try to put balanced number +of pods into each bucket. +MaxSkew is default set to 1 +WhenUnsatisfiable is default set to DoNotSchedule +LabelSelector is generated by component type +See pkg/apis/pingcap/v1alpha1/tidbcluster_component.go#TopologySpreadConstraints()

+

TxnLocalLatches

(Appears on: diff --git a/examples/advanced/tidb-cluster.yaml b/examples/advanced/tidb-cluster.yaml index 5eb04c9036..6d063274d6 100644 --- a/examples/advanced/tidb-cluster.yaml +++ b/examples/advanced/tidb-cluster.yaml @@ -136,6 +136,12 @@ spec: ## Set this in the case where a TiDB cluster is deployed across multiple Kubernetes clusters. default to empty. # clusterDomain: cluster.local + ## TopologySpreadConstraints for pod scheduling, will be overwritten by each cluster component's specific spread constraints setting + ## Can refer to PD/TiDB/TiKV/TiCDC/TiFlash/Pump topologySpreadConstraints settings, and ensure only cluster-scope general settings here + ## Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone + ########################### # TiDB Cluster Components # ########################### @@ -315,6 +321,11 @@ spec: ## Ref: https://docs.pingcap.com/tidb-in-kubernetes/stable/enable-tls-for-mysql-client # tlsClientSecretName: custom-tidb-client-secret-name + ## TopologySpreadConstraints for pod scheduling, will overwrite cluster level spread constraints setting + ## Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone + tidb: ############################ # Basic TiDB Configuration # @@ -528,6 +539,11 @@ spec: # - tidb # topologyKey: kubernetes.io/hostname + ## TopologySpreadConstraints for pod scheduling, will overwrite cluster level spread constraints setting + ## Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone + tikv: ############################ # Basic TiKV Configuration # @@ -721,6 +737,11 @@ spec: # - tikv # topologyKey: kubernetes.io/hostname + ## TopologySpreadConstraints for pod scheduling, will overwrite cluster level spread constraints setting + ## Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone + ## Deploy TiDB Binlog of a TiDB cluster ## Ref: https://pingcap.com/docs/tidb-in-kubernetes/stable/deploy-tidb-binlog/#deploy-pump # pump: @@ -757,6 +778,10 @@ spec: # # Ref: https://docs.pingcap.com/tidb/stable/tidb-binlog-configuration-file#pump # config: | # gc = 7 + # # TopologySpreadConstraints for pod scheduling, will overwrite cluster level spread constraints setting + # # Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone ## TiCDC is a tool for replicating the incremental data of TiDB ## Ref: https://pingcap.com/docs/tidb-in-kubernetes/stable/deploy-ticdc/ @@ -796,6 +821,10 @@ spec: # gcTTL: 86400 # logLevel: info # logFile: /dev/stderr + # # TopologySpreadConstraints for pod scheduling, will overwrite cluster level spread constraints setting + # # Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone ## TiFlash is the columnar storage extension of TiKV ## Ref: https://pingcap.com/docs/tidb-in-kubernetes/stable/deploy-tiflash/ @@ -869,3 +898,7 @@ spec: # proxy: | # [security] # cert-allowed-cn = CNNAME + # # TopologySpreadConstraints for pod scheduling, will overwrite the cluster level spread constraints setting + # # Ref: pkg/apis/pingcap/v1alpha1/types.go#TopologySpreadConstraint + # topologySpreadConstraints: + # - topologyKey: topology.kubernetes.io/zone diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 35fa89bb83..f583f75861 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -2433,6 +2433,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -4517,6 +4520,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -6565,6 +6571,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -8754,6 +8763,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -10807,6 +10819,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -12868,6 +12883,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -12892,6 +12910,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string type: object @@ -15283,6 +15304,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: @@ -15372,6 +15396,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string worker: @@ -17424,6 +17451,9 @@ spec: type: string type: object type: array + topologySpreadConstraints: + items: {} + type: array version: type: string required: diff --git a/pkg/apis/pingcap/v1alpha1/dmcluster_test.go b/pkg/apis/pingcap/v1alpha1/dmcluster_test.go index 570f8dc0df..b27fc49b7e 100644 --- a/pkg/apis/pingcap/v1alpha1/dmcluster_test.go +++ b/pkg/apis/pingcap/v1alpha1/dmcluster_test.go @@ -98,6 +98,7 @@ func TestDMMasterIsAvailable(t *testing.T) { } } +// TODO: refector test of buildDMClusterComponentAccessor func TestDMComponentAccessor(t *testing.T) { g := NewGomegaWithT(t) @@ -110,7 +111,14 @@ func TestDMComponentAccessor(t *testing.T) { testFn := func(test *testcase, t *testing.T) { t.Log(test.name) - accessor := buildDMClusterComponentAccessor(test.cluster, test.component) + dc := &DMCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: *test.cluster, + } + + accessor := buildDMClusterComponentAccessor(ComponentDMMaster, dc, test.component) test.expectFn(g, accessor) } affinity := &corev1.Affinity{ diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 32ab3b0eb0..0d85010b09 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -1471,11 +1471,30 @@ func schema_pkg_apis_pingcap_v1alpha1_ComponentSpec(ref common.ReferenceCallback Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, }, }, }, Dependencies: []string{ - "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount"}, } } @@ -1759,11 +1778,30 @@ func schema_pkg_apis_pingcap_v1alpha1_DMClusterSpec(ref common.ReferenceCallback Ref: ref("k8s.io/api/core/v1.PodSecurityContext"), }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, }, }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.DMDiscoverySpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TLSCluster", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.WorkerSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.DMDiscoverySpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TLSCluster", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.WorkerSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration"}, } } @@ -3172,6 +3210,25 @@ func schema_pkg_apis_pingcap_v1alpha1_MasterSpec(ref common.ReferenceCallback) c Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -3259,7 +3316,7 @@ func schema_pkg_apis_pingcap_v1alpha1_MasterSpec(ref common.ReferenceCallback) c }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterConfig", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterServiceSpec", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterConfig", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.MasterServiceSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -4335,6 +4392,25 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -4456,7 +4532,7 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ServiceSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.ServiceSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -5084,6 +5160,25 @@ func schema_pkg_apis_pingcap_v1alpha1_PumpSpec(ref common.ReferenceCallback) com Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -5151,7 +5246,7 @@ func schema_pkg_apis_pingcap_v1alpha1_PumpSpec(ref common.ReferenceCallback) com }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/util/config.GenericConfig", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "github.com/pingcap/tidb-operator/pkg/util/config.GenericConfig", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -6357,6 +6452,25 @@ func schema_pkg_apis_pingcap_v1alpha1_TiCDCSpec(ref common.ReferenceCallback) co Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -6431,7 +6545,7 @@ func schema_pkg_apis_pingcap_v1alpha1_TiCDCSpec(ref common.ReferenceCallback) co }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiCDCConfig", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiCDCConfig", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -7110,6 +7224,25 @@ func schema_pkg_apis_pingcap_v1alpha1_TiDBSpec(ref common.ReferenceCallback) com Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -7262,7 +7395,7 @@ func schema_pkg_apis_pingcap_v1alpha1_TiDBSpec(ref common.ReferenceCallback) com }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBProbe", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBServiceSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBSlowLogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBTLSClient", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBProbe", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBServiceSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBSlowLogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBTLSClient", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -7489,6 +7622,25 @@ func schema_pkg_apis_pingcap_v1alpha1_TiFlashSpec(ref common.ReferenceCallback) Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -7589,7 +7741,7 @@ func schema_pkg_apis_pingcap_v1alpha1_TiFlashSpec(ref common.ReferenceCallback) }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.LogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageClaim", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiFlashConfigWraper", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.LogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageClaim", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiFlashConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -9745,6 +9897,25 @@ func schema_pkg_apis_pingcap_v1alpha1_TiKVSpec(ref common.ReferenceCallback) com Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -9908,7 +10079,7 @@ func schema_pkg_apis_pingcap_v1alpha1_TiKVSpec(ref common.ReferenceCallback) com }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.LogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVConfigWraper", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.LogTailerSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.StorageVolume", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVConfigWraper", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } @@ -10834,11 +11005,30 @@ func schema_pkg_apis_pingcap_v1alpha1_TidbClusterSpec(ref common.ReferenceCallba Ref: ref("k8s.io/api/core/v1.PodSecurityContext"), }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, }, }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.DiscoverySpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.HelperSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PumpSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TLSCluster", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiCDCSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiFlashSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TidbClusterRef", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.DiscoverySpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.HelperSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PDSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.PumpSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TLSCluster", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiCDCSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiDBSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiFlashSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TiKVSpec", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TidbClusterRef", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration"}, } } @@ -11814,6 +12004,25 @@ func schema_pkg_apis_pingcap_v1alpha1_WorkerSpec(ref common.ReferenceCallback) c Format: "", }, }, + "topologySpreadConstraints": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": "topologyKey", + "x-kubernetes-list-type": "map", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "TopologySpreadConstraints describes how a group of pods ought to spread across topology domains. Scheduler will schedule pods in a way which abides by the constraints. This field is is only honored by clusters that enables the EvenPodsSpread feature. All topologySpreadConstraints are ANDed.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Ref: ref("github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint"), + }, + }, + }, + }, + }, "limits": { SchemaProps: spec.SchemaProps{ Description: "Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/", @@ -11902,7 +12111,7 @@ func schema_pkg_apis_pingcap_v1alpha1_WorkerSpec(ref common.ReferenceCallback) c }, }, Dependencies: []string{ - "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.WorkerConfig", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, + "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.TopologySpreadConstraint", "github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1.WorkerConfig", "k8s.io/api/core/v1.Affinity", "k8s.io/api/core/v1.Container", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.LocalObjectReference", "k8s.io/api/core/v1.PodSecurityContext", "k8s.io/api/core/v1.Toleration", "k8s.io/api/core/v1.Volume", "k8s.io/api/core/v1.VolumeMount", "k8s.io/apimachinery/pkg/api/resource.Quantity"}, } } diff --git a/pkg/apis/pingcap/v1alpha1/tidbcluster_component.go b/pkg/apis/pingcap/v1alpha1/tidbcluster_component.go index ce41459920..f53e4530be 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbcluster_component.go +++ b/pkg/apis/pingcap/v1alpha1/tidbcluster_component.go @@ -14,8 +14,10 @@ package v1alpha1 import ( + "github.com/pingcap/tidb-operator/pkg/label" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -45,9 +47,30 @@ type ComponentAccessor interface { AdditionalVolumeMounts() []corev1.VolumeMount TerminationGracePeriodSeconds() *int64 StatefulSetUpdateStrategy() apps.StatefulSetUpdateStrategyType + TopologySpreadConstraints() []corev1.TopologySpreadConstraint } +// Component defines component identity of all components +type Component int + +const ( + ComponentPD Component = iota + ComponentTiDB + ComponentTiKV + ComponentTiFlash + ComponentTiCDC + ComponentPump + ComponentDiscovery + ComponentDMDiscovery + ComponentDMMaster + ComponentDMWorker +) + type componentAccessorImpl struct { + component Component + name string + kind string + imagePullPolicy corev1.PullPolicy imagePullSecrets []corev1.LocalObjectReference hostNetwork *bool @@ -60,6 +83,7 @@ type componentAccessorImpl struct { configUpdateStrategy ConfigUpdateStrategy statefulSetUpdateStrategy apps.StatefulSetUpdateStrategyType podSecurityContext *corev1.PodSecurityContext + topologySpreadConstraints []TopologySpreadConstraint // ComponentSpec is the Component Spec ComponentSpec *ComponentSpec @@ -185,13 +209,14 @@ func (a *componentAccessorImpl) ConfigUpdateStrategy() ConfigUpdateStrategy { func (a *componentAccessorImpl) BuildPodSpec() corev1.PodSpec { spec := corev1.PodSpec{ - SchedulerName: a.SchedulerName(), - Affinity: a.Affinity(), - NodeSelector: a.NodeSelector(), - HostNetwork: a.HostNetwork(), - RestartPolicy: corev1.RestartPolicyAlways, - Tolerations: a.Tolerations(), - SecurityContext: a.PodSecurityContext(), + SchedulerName: a.SchedulerName(), + Affinity: a.Affinity(), + NodeSelector: a.NodeSelector(), + HostNetwork: a.HostNetwork(), + RestartPolicy: corev1.RestartPolicyAlways, + Tolerations: a.Tolerations(), + SecurityContext: a.PodSecurityContext(), + TopologySpreadConstraints: a.TopologySpreadConstraints(), } if a.PriorityClassName() != nil { spec.PriorityClassName = *a.PriorityClassName() @@ -247,8 +272,73 @@ func (a *componentAccessorImpl) TerminationGracePeriodSeconds() *int64 { return a.ComponentSpec.TerminationGracePeriodSeconds } -func buildTidbClusterComponentAccessor(spec *TidbClusterSpec, componentSpec *ComponentSpec) ComponentAccessor { +func (a *componentAccessorImpl) TopologySpreadConstraints() []corev1.TopologySpreadConstraint { + tscs := a.topologySpreadConstraints + if a.ComponentSpec != nil && len(a.ComponentSpec.TopologySpreadConstraints) > 0 { + tscs = a.ComponentSpec.TopologySpreadConstraints + } + + if len(tscs) == 0 { + return nil + } + + ptscs := make([]corev1.TopologySpreadConstraint, 0, len(tscs)) + for _, tsc := range tscs { + ptsc := corev1.TopologySpreadConstraint{ + MaxSkew: 1, + TopologyKey: tsc.TopologyKey, + WhenUnsatisfiable: corev1.DoNotSchedule, + } + componentLabelVal := getComponentLabelValue(a.component) + var l label.Label + switch a.kind { + case TiDBClusterKind: + l = label.New() + case DMClusterKind: + l = label.NewDM() + } + l[label.ComponentLabelKey] = componentLabelVal + l[label.InstanceLabelKey] = a.name + ptsc.LabelSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string(l), + } + ptscs = append(ptscs, ptsc) + } + return ptscs +} + +func getComponentLabelValue(c Component) string { + switch c { + case ComponentPD: + return label.PDLabelVal + case ComponentTiDB: + return label.TiDBLabelVal + case ComponentTiKV: + return label.TiKVLabelVal + case ComponentTiFlash: + return label.TiFlashLabelVal + case ComponentTiCDC: + return label.TiCDCLabelVal + case ComponentPump: + return label.PumpLabelVal + case ComponentDiscovery: + return label.DiscoveryLabelVal + case ComponentDMDiscovery: + return label.DiscoveryLabelVal + case ComponentDMMaster: + return label.DMMasterLabelVal + case ComponentDMWorker: + return label.DMWorkerLabelVal + } + return "" +} + +func buildTidbClusterComponentAccessor(c Component, tc *TidbCluster, componentSpec *ComponentSpec) ComponentAccessor { + spec := &tc.Spec return &componentAccessorImpl{ + name: tc.Name, + kind: TiDBClusterKind, + component: c, imagePullPolicy: spec.ImagePullPolicy, imagePullSecrets: spec.ImagePullSecrets, hostNetwork: spec.HostNetwork, @@ -261,24 +351,30 @@ func buildTidbClusterComponentAccessor(spec *TidbClusterSpec, componentSpec *Com configUpdateStrategy: spec.ConfigUpdateStrategy, statefulSetUpdateStrategy: spec.StatefulSetUpdateStrategy, podSecurityContext: spec.PodSecurityContext, + topologySpreadConstraints: spec.TopologySpreadConstraints, ComponentSpec: componentSpec, } } -func buildDMClusterComponentAccessor(spec *DMClusterSpec, componentSpec *ComponentSpec) ComponentAccessor { +func buildDMClusterComponentAccessor(c Component, dc *DMCluster, componentSpec *ComponentSpec) ComponentAccessor { + spec := &dc.Spec return &componentAccessorImpl{ - imagePullPolicy: spec.ImagePullPolicy, - imagePullSecrets: spec.ImagePullSecrets, - hostNetwork: spec.HostNetwork, - affinity: spec.Affinity, - priorityClassName: spec.PriorityClassName, - schedulerName: spec.SchedulerName, - clusterNodeSelector: spec.NodeSelector, - clusterAnnotations: spec.Annotations, - tolerations: spec.Tolerations, - configUpdateStrategy: ConfigUpdateStrategyRollingUpdate, - podSecurityContext: spec.PodSecurityContext, + name: dc.Name, + kind: DMClusterKind, + component: c, + imagePullPolicy: spec.ImagePullPolicy, + imagePullSecrets: spec.ImagePullSecrets, + hostNetwork: spec.HostNetwork, + affinity: spec.Affinity, + priorityClassName: spec.PriorityClassName, + schedulerName: spec.SchedulerName, + clusterNodeSelector: spec.NodeSelector, + clusterAnnotations: spec.Annotations, + tolerations: spec.Tolerations, + configUpdateStrategy: ConfigUpdateStrategyRollingUpdate, + podSecurityContext: spec.PodSecurityContext, + topologySpreadConstraints: spec.TopologySpreadConstraints, ComponentSpec: componentSpec, } @@ -287,52 +383,47 @@ func buildDMClusterComponentAccessor(spec *DMClusterSpec, componentSpec *Compone // BaseDiscoverySpec returns the base spec of discovery component func (tc *TidbCluster) BaseDiscoverySpec() ComponentAccessor { // all configs follow global one - return buildTidbClusterComponentAccessor(&tc.Spec, nil) + return buildTidbClusterComponentAccessor(ComponentDiscovery, tc, nil) } // BaseTiDBSpec returns the base spec of TiDB servers func (tc *TidbCluster) BaseTiDBSpec() ComponentAccessor { - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.TiDB.ComponentSpec) + return buildTidbClusterComponentAccessor(ComponentTiDB, tc, &tc.Spec.TiDB.ComponentSpec) } // BaseTiKVSpec returns the base spec of TiKV servers func (tc *TidbCluster) BaseTiKVSpec() ComponentAccessor { - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.TiKV.ComponentSpec) + return buildTidbClusterComponentAccessor(ComponentTiKV, tc, &tc.Spec.TiKV.ComponentSpec) } // BaseTiFlashSpec returns the base spec of TiFlash servers func (tc *TidbCluster) BaseTiFlashSpec() ComponentAccessor { - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.TiFlash.ComponentSpec) + return buildTidbClusterComponentAccessor(ComponentTiFlash, tc, &tc.Spec.TiFlash.ComponentSpec) } // BaseTiCDCSpec returns the base spec of TiCDC servers func (tc *TidbCluster) BaseTiCDCSpec() ComponentAccessor { - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.TiCDC.ComponentSpec) + return buildTidbClusterComponentAccessor(ComponentTiCDC, tc, &tc.Spec.TiCDC.ComponentSpec) } // BasePDSpec returns the base spec of PD servers func (tc *TidbCluster) BasePDSpec() ComponentAccessor { - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.PD.ComponentSpec) + return buildTidbClusterComponentAccessor(ComponentPD, tc, &tc.Spec.PD.ComponentSpec) } -// BasePumpSpec returns two results: -// 1. the base pump spec, if exists. -// 2. whether the base pump spec exists. -func (tc *TidbCluster) BasePumpSpec() (ComponentAccessor, bool) { - if tc.Spec.Pump == nil { - return nil, false - } - return buildTidbClusterComponentAccessor(&tc.Spec, &tc.Spec.Pump.ComponentSpec), true +// BasePumpSpec returns the base spec of Pump: +func (tc *TidbCluster) BasePumpSpec() ComponentAccessor { + return buildTidbClusterComponentAccessor(ComponentPump, tc, &tc.Spec.Pump.ComponentSpec) } func (dc *DMCluster) BaseDiscoverySpec() ComponentAccessor { - return buildDMClusterComponentAccessor(&dc.Spec, nil) + return buildDMClusterComponentAccessor(ComponentDMDiscovery, dc, nil) } func (dc *DMCluster) BaseMasterSpec() ComponentAccessor { - return buildDMClusterComponentAccessor(&dc.Spec, &dc.Spec.Master.ComponentSpec) + return buildDMClusterComponentAccessor(ComponentDMMaster, dc, &dc.Spec.Master.ComponentSpec) } func (dc *DMCluster) BaseWorkerSpec() ComponentAccessor { - return buildDMClusterComponentAccessor(&dc.Spec, &dc.Spec.Worker.ComponentSpec) + return buildDMClusterComponentAccessor(ComponentDMWorker, dc, &dc.Spec.Worker.ComponentSpec) } diff --git a/pkg/apis/pingcap/v1alpha1/tidbcluster_test.go b/pkg/apis/pingcap/v1alpha1/tidbcluster_test.go index c7080d2ca1..9f36b958b0 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbcluster_test.go +++ b/pkg/apis/pingcap/v1alpha1/tidbcluster_test.go @@ -249,6 +249,7 @@ func TestTiKVIsAvailable(t *testing.T) { } } +// TODO: refector test of buildTidbClusterComponentAccessor func TestComponentAccessor(t *testing.T) { g := NewGomegaWithT(t) @@ -261,7 +262,14 @@ func TestComponentAccessor(t *testing.T) { testFn := func(test *testcase, t *testing.T) { t.Log(test.name) - accessor := buildTidbClusterComponentAccessor(test.cluster, test.component) + tc := &TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Spec: *test.cluster, + } + + accessor := buildTidbClusterComponentAccessor(ComponentTiDB, tc, test.component) test.expectFn(g, accessor) } affinity := &corev1.Affinity{ diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go index ba476457ed..5bd137827c 100644 --- a/pkg/apis/pingcap/v1alpha1/types.go +++ b/pkg/apis/pingcap/v1alpha1/types.go @@ -270,6 +270,15 @@ type TidbClusterSpec struct { // PodSecurityContext of the component // +optional PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` + + // TopologySpreadConstraints describes how a group of pods ought to spread across topology + // domains. Scheduler will schedule pods in a way which abides by the constraints. + // This field is is only honored by clusters that enables the EvenPodsSpread feature. + // All topologySpreadConstraints are ANDed. + // +optional + // +listType=map + // +listMapKey=topologyKey + TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` } // TidbClusterStatus represents the current status of a tidb cluster. @@ -880,6 +889,15 @@ type ComponentSpec struct { // Template. // +optional StatefulSetUpdateStrategy apps.StatefulSetUpdateStrategyType `json:"statefulSetUpdateStrategy,omitempty"` + + // TopologySpreadConstraints describes how a group of pods ought to spread across topology + // domains. Scheduler will schedule pods in a way which abides by the constraints. + // This field is is only honored by clusters that enables the EvenPodsSpread feature. + // All topologySpreadConstraints are ANDed. + // +optional + // +listType=map + // +listMapKey=topologyKey + TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` } // ServiceSpec specifies the service object in k8s @@ -1807,6 +1825,15 @@ type DMClusterSpec struct { // PodSecurityContext of the component // +optional PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` + + // TopologySpreadConstraints describes how a group of pods ought to spread across topology + // domains. Scheduler will schedule pods in a way which abides by the constraints. + // This field is is only honored by clusters that enables the EvenPodsSpread feature. + // All topologySpreadConstraints are ANDed. + // +optional + // +listType=map + // +listMapKey=topologyKey + TopologySpreadConstraints []TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` } // DMClusterStatus represents the current status of a dm cluster. @@ -2032,3 +2059,18 @@ type StorageVolume struct { StorageSize string `json:"storageSize"` MountPath string `json:"mountPath"` } + +// TopologySpreadConstraint specifies how to spread matching pods among the given topology. +// It is a minimal version of corev1.TopologySpreadConstraint to avoid to add too many fields of API +// Refer to https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints +type TopologySpreadConstraint struct { + // TopologyKey is the key of node labels. Nodes that have a label with this key + // and identical values are considered to be in the same topology. + // We consider each as a "bucket", and try to put balanced number + // of pods into each bucket. + // MaxSkew is default set to 1 + // WhenUnsatisfiable is default set to DoNotSchedule + // LabelSelector is generated by component type + // See pkg/apis/pingcap/v1alpha1/tidbcluster_component.go#TopologySpreadConstraints() + TopologyKey string `json:"topologyKey"` +} diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go index 98a8437ef9..0e79e0e218 100644 --- a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go @@ -802,6 +802,11 @@ func (in *ComponentSpec) DeepCopyInto(out *ComponentSpec) { *out = new(int64) **out = **in } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]TopologySpreadConstraint, len(*in)) + copy(*out, *in) + } return } @@ -1077,6 +1082,11 @@ func (in *DMClusterSpec) DeepCopyInto(out *DMClusterSpec) { *out = new(v1.PodSecurityContext) (*in).DeepCopyInto(*out) } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]TopologySpreadConstraint, len(*in)) + copy(*out, *in) + } return } @@ -7983,6 +7993,11 @@ func (in *TidbClusterSpec) DeepCopyInto(out *TidbClusterSpec) { *out = new(v1.PodSecurityContext) (*in).DeepCopyInto(*out) } + if in.TopologySpreadConstraints != nil { + in, out := &in.TopologySpreadConstraints, &out.TopologySpreadConstraints + *out = make([]TopologySpreadConstraint, len(*in)) + copy(*out, *in) + } return } @@ -8433,6 +8448,22 @@ func (in *TikvAutoScalerStatus) DeepCopy() *TikvAutoScalerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TopologySpreadConstraint) DeepCopyInto(out *TopologySpreadConstraint) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TopologySpreadConstraint. +func (in *TopologySpreadConstraint) DeepCopy() *TopologySpreadConstraint { + if in == nil { + return nil + } + out := new(TopologySpreadConstraint) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *TxnLocalLatches) DeepCopyInto(out *TxnLocalLatches) { *out = *in diff --git a/pkg/manager/member/pump_member_manager.go b/pkg/manager/member/pump_member_manager.go index 4165c62290..1420cf5120 100644 --- a/pkg/manager/member/pump_member_manager.go +++ b/pkg/manager/member/pump_member_manager.go @@ -180,11 +180,7 @@ func (m *pumpMemberManager) syncHeadlessService(tc *v1alpha1.TidbCluster) error } func (m *pumpMemberManager) syncConfigMap(tc *v1alpha1.TidbCluster, set *appsv1.StatefulSet) (*corev1.ConfigMap, error) { - - basePumpSpec, createPump := tc.BasePumpSpec() - if !createPump { - return nil, nil - } + basePumpSpec := tc.BasePumpSpec() newCm, err := getNewPumpConfigMap(tc) if err != nil { @@ -232,9 +228,7 @@ func getNewPumpHeadlessService(tc *v1alpha1.TidbCluster) *corev1.Service { // getNewPumpConfigMap returns a configMap for pump func getNewPumpConfigMap(tc *v1alpha1.TidbCluster) (*corev1.ConfigMap, error) { - - _, createPump := tc.BasePumpSpec() - if !createPump { + if tc.Spec.Pump == nil { return nil, nil } spec := tc.Spec.Pump @@ -271,10 +265,7 @@ func getNewPumpConfigMap(tc *v1alpha1.TidbCluster) (*corev1.ConfigMap, error) { } func getNewPumpStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*appsv1.StatefulSet, error) { - spec, ok := tc.BasePumpSpec() - if !ok { - return nil, nil - } + spec := tc.BasePumpSpec() objMeta, pumpLabel := getPumpMeta(tc, controller.PumpMemberName) replicas := tc.Spec.Pump.Replicas storageClass := tc.Spec.Pump.StorageClassName @@ -386,32 +377,26 @@ func getNewPumpStatefulSet(tc *v1alpha1.TidbCluster, cm *corev1.ConfigMap) (*app }, } + // TODO: set serviceAccountName in BuildPodSpec serviceAccountName := tc.Spec.Pump.ServiceAccount if serviceAccountName == "" { serviceAccountName = tc.Spec.ServiceAccount } + podSpec := spec.BuildPodSpec() + podSpec.Containers = containers + podSpec.Volumes = volumes + podSpec.ServiceAccountName = serviceAccountName + // TODO: change to set field in BuildPodSpec + podSpec.InitContainers = spec.InitContainers() + // TODO: change to set field in BuildPodSpec + podSpec.DNSPolicy = spec.DnsPolicy() - // TODO: use spec.BuildPodSpec() to generate pod spec podTemplate := corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Annotations: podAnnos, Labels: pumpLabel, }, - Spec: corev1.PodSpec{ - Containers: containers, - ServiceAccountName: serviceAccountName, - Volumes: volumes, - - Affinity: spec.Affinity(), - Tolerations: spec.Tolerations(), - NodeSelector: spec.NodeSelector(), - SchedulerName: spec.SchedulerName(), - SecurityContext: spec.PodSecurityContext(), - HostNetwork: spec.HostNetwork(), - DNSPolicy: spec.DnsPolicy(), - ImagePullSecrets: spec.ImagePullSecrets(), - InitContainers: spec.InitContainers(), - }, + Spec: podSpec, } return &appsv1.StatefulSet{ diff --git a/tests/dt.go b/tests/dt.go index c169db4d65..aac0c73793 100644 --- a/tests/dt.go +++ b/tests/dt.go @@ -15,7 +15,7 @@ package tests import ( "fmt" - "time" + "strconv" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" @@ -24,13 +24,15 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/kubernetes/test/e2e/framework/log" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" ) const ( RackLabel = "rack" RackNum = 3 + + LabelKeyTestingZone = "testing.pingcap.com/zone" ) // RegionInfo records detail region info for api usage. @@ -62,24 +64,18 @@ func (oa *OperatorActions) LabelNodes() error { return err } - for i, node := range nodes.Items { - err := wait.PollImmediate(3*time.Second, time.Minute, func() (bool, error) { - n, err := oa.kubeCli.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{}) - if err != nil { - log.Logf("ERROR: get node:[%s] failed! error: %v", node.Name, err) - return false, nil - } - index := i % RackNum - n.Labels[RackLabel] = fmt.Sprintf("rack%d", index) - _, err = oa.kubeCli.CoreV1().Nodes().Update(n) - if err != nil { - log.Logf("ERROR: label node:[%s] failed! error: %v", node.Name, err) - return false, nil - } - return true, nil - }) - - if err != nil { + for i := range nodes.Items { + node := &nodes.Items[i] + zone := "zone-" + strconv.Itoa(i%2) + rack := "rack" + strconv.Itoa(i%RackNum) + patch := []byte(fmt.Sprintf( + `{"metadata":{"labels":{"%s":"%s","%s":"%s"}}}`, + LabelKeyTestingZone, + zone, + RackLabel, + rack, + )) + if _, err := oa.kubeCli.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patch); err != nil { return fmt.Errorf("label nodes failed, error: %v", err) } } @@ -93,6 +89,25 @@ func (oa *OperatorActions) LabelNodesOrDie() { } } +func CleanNodeLabels(c kubernetes.Interface) error { + nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{}) + if err != nil { + return err + } + for i := range nodeList.Items { + node := &nodeList.Items[i] + patch := []byte(fmt.Sprintf( + `{"metadata":{"labels":{"%s":null,"%s":null}}}`, + LabelKeyTestingZone, + RackLabel, + )) + if _, err = c.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patch); err != nil { + return err + } + } + return nil +} + func (oa *OperatorActions) CheckDisasterTolerance(cluster *TidbClusterConfig) error { pds, err := oa.kubeCli.CoreV1().Pods(cluster.Namespace).List( metav1.ListOptions{LabelSelector: labels.SelectorFromSet( diff --git a/tests/e2e/e2e.go b/tests/e2e/e2e.go index 427d55bd71..bda267a1c3 100644 --- a/tests/e2e/e2e.go +++ b/tests/e2e/e2e.go @@ -336,6 +336,12 @@ var _ = ginkgo.SynchronizedAfterSuite(func() { config.QPS = 20 config.Burst = 50 kubeCli, _ := kubernetes.NewForConfig(config) + if !ginkgo.CurrentGinkgoTestDescription().Failed { + ginkgo.By("Clean labels") + err := tests.CleanNodeLabels(kubeCli) + framework.ExpectNoError(err, "failed to clean labels") + } + ginkgo.By("Deleting cert-manager") err := tidbcluster.DeleteCertManager(kubeCli) framework.ExpectNoError(err, "failed to delete cert-manager") diff --git a/tests/e2e/tidbcluster/tidbcluster.go b/tests/e2e/tidbcluster/tidbcluster.go index 93da19a9a5..1a4fc65ff4 100644 --- a/tests/e2e/tidbcluster/tidbcluster.go +++ b/tests/e2e/tidbcluster/tidbcluster.go @@ -1607,7 +1607,7 @@ var _ = ginkgo.Describe("TiDBCluster", func() { framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", tc.Namespace, tc.Name) }) - ginkgo.Describe("All pods controlled by TidbCluster can set pod security context", func() { + ginkgo.Describe("[Feature]: PodSecurityContext", func() { ginkgo.It("TidbCluster global pod security context", func() { ginkgo.By("Deploy tidbCluster") userID := int64(1000) @@ -1638,8 +1638,104 @@ var _ = ginkgo.Describe("TiDBCluster", func() { } }) }) + + ginkgo.Describe("[Feature]: TopologySpreadConstraint", func() { + nodeZoneMap := map[string]string{} + + ginkgo.BeforeEach(func() { + nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{}) + framework.ExpectNoError(err, "failed to list nodes: %+v") + for i := range nodeList.Items { + node := &nodeList.Items[i] + zone, ok := node.Labels[tests.LabelKeyTestingZone] + framework.ExpectEqual(ok, true, "label %s should exist", tests.LabelKeyTestingZone) + nodeZoneMap[node.Name] = zone + } + }) + + ginkgo.It("TidbCluster global topology spread contraint", func() { + ginkgo.By("Deploy tidbCluster") + tc := fixture.GetTidbCluster(ns, "topology-test", utilimage.TiDBV4) + tc.Spec.TopologySpreadConstraints = []v1alpha1.TopologySpreadConstraint{ + { + TopologyKey: tests.LabelKeyTestingZone, + }, + } + // change to use default scheduler + tc.Spec.SchedulerName = "default-scheduler" + tc.Spec.PD.Replicas = 3 + tc.Spec.TiDB.Replicas = 2 + tc.Spec.TiKV.Replicas = 2 + + tc = fixture.AddTiFlashForTidbCluster(tc) + tc = fixture.AddTiCDCForTidbCluster(tc) + tc = fixture.AddPumpForTidbCluster(tc) + + tc.Spec.TiFlash.Replicas = 2 + tc.Spec.TiCDC.Replicas = 2 + tc.Spec.Pump.Replicas = 2 + + utiltc.MustCreateTCWithComponentsReady(genericCli, oa, tc, 5*time.Minute, 10*time.Second) + podList, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{}) + framework.ExpectNoError(err, "failed to list pods: %+v") + + err = validatePodSpread(podList.Items, nodeZoneMap, []string{label.PDLabelVal}, 1) + framework.ExpectNoError(err, "failed even spread pd pods: %v") + + err = validatePodSpread(podList.Items, nodeZoneMap, []string{ + label.TiDBLabelVal, + label.TiKVLabelVal, + label.TiFlashLabelVal, + label.TiCDCLabelVal, + label.PumpLabelVal, + }, 0) + framework.ExpectNoError(err, "failed even spread pods: %v") + }) + }) }) +func validatePodSpread(pods []corev1.Pod, nodeZoneMap map[string]string, componentLabels []string, maxSkew int) error { + zones := make([][2]int, len(componentLabels)) + for i := range pods { + pod := &pods[i] + framework.ExpectEqual(len(pod.Spec.TopologySpreadConstraints), 1, "Expected pod topology spread constraints are set") + + c, ok := pod.Labels[label.ComponentLabelKey] + if !ok { + continue + } + + zone, ok := nodeZoneMap[pod.Spec.NodeName] + if !ok { + return fmt.Errorf("node %s has no zone label", pod.Spec.NodeName) + } + + zoneId := 0 + switch zone { + case "zone-0": + zoneId = 0 + case "zone-1": + zoneId = 1 + } + + for i, componentLabel := range componentLabels { + if c == componentLabel { + zones[i][zoneId]++ + } + } + } + for i, componentLabel := range componentLabels { + skew := zones[i][0] - zones[i][1] + if skew < 0 { + skew = -skew + } + if skew > maxSkew { + return fmt.Errorf("%s pods are not even spread", componentLabel) + } + } + return nil +} + func newTidbClusterConfig(cfg *tests.Config, ns, clusterName, password, tcVersion string) tests.TidbClusterConfig { return tests.TidbClusterConfig{ Namespace: ns, From 2088b3076f36a295352704ef7090a5b5ef4033e9 Mon Sep 17 00:00:00 2001 From: Yilong Li Date: Tue, 27 Apr 2021 12:09:56 +0800 Subject: [PATCH 3/4] add e2e case for upgrade tc version (#3775) --- Makefile | 2 +- tests/actions.go | 198 ++++++------ tests/e2e/tidbcluster/stability.go | 2 +- tests/e2e/tidbcluster/tidbcluster.go | 369 +++++++++++++++++++++- tests/e2e/util/tidbcluster/tidbcluster.go | 43 ++- tests/pkg/fixture/fixture.go | 2 +- 6 files changed, 504 insertions(+), 112 deletions(-) diff --git a/Makefile b/Makefile index 19a5e98016..afd4422fc9 100644 --- a/Makefile +++ b/Makefile @@ -162,7 +162,7 @@ endif ALL_CHECKS = EOF codegen boilerplate openapi-spec crd-groups spelling -check: $(addprefix check-,$(ALL_CHECKS)) lint tidy +check: $(addprefix check-,$(ALL_CHECKS)) lint tidy check-%: ./hack/verify-$*.sh diff --git a/tests/actions.go b/tests/actions.go index 3f4bb3d1be..835e814150 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1402,15 +1402,18 @@ func getStsContainer(kubeCli kubernetes.Interface, sts *apps.StatefulSet, contai func (oa *OperatorActions) pdMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, error) { if tc.Spec.PD == nil { + log.Logf("no pd in tc spec, skip") return true, nil } tcName := tc.GetName() ns := tc.GetNamespace() pdSetName := controller.PDMemberName(tcName) + tcID := fmt.Sprintf("%s/%s", ns, tcName) + pdStsID := fmt.Sprintf("%s/%s", ns, pdSetName) pdSet, err := oa.tcStsGetter.StatefulSets(ns).Get(pdSetName, metav1.GetOptions{}) if err != nil { - log.Logf("failed to get statefulset: %s/%s, %v", ns, pdSetName, err) + log.Logf("failed to get StatefulSet: %q, %v", pdStsID, err) return false, nil } @@ -1424,78 +1427,75 @@ func (oa *OperatorActions) pdMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, err } if tc.Status.PD.StatefulSet == nil { - log.Logf("tidbcluster: %s/%s .status.PD.StatefulSet is nil", ns, tcName) + log.Logf("TidbCluster: %q .status.PD.StatefulSet is nil", tcID) return false, nil } failureCount := len(tc.Status.PD.FailureMembers) replicas := tc.Spec.PD.Replicas + int32(failureCount) if *pdSet.Spec.Replicas != replicas { - log.Logf("statefulset: %s/%s .spec.Replicas(%d) != %d", - ns, pdSetName, *pdSet.Spec.Replicas, replicas) + log.Logf("StatefulSet: %q .spec.Replicas(%d) != %d", pdStsID, *pdSet.Spec.Replicas, replicas) return false, nil } if pdSet.Status.ReadyReplicas != tc.Spec.PD.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != %d", - ns, pdSetName, pdSet.Status.ReadyReplicas, tc.Spec.PD.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != %d", pdStsID, pdSet.Status.ReadyReplicas, tc.Spec.PD.Replicas) return false, nil } if len(tc.Status.PD.Members) != int(tc.Spec.PD.Replicas) { - log.Logf("tidbcluster: %s/%s .status.PD.Members count(%d) != %d", - ns, tcName, len(tc.Status.PD.Members), tc.Spec.PD.Replicas) + log.Logf("TidbCluster: %q .status.PD.Members count(%d) != %d", tcID, len(tc.Status.PD.Members), tc.Spec.PD.Replicas) return false, nil } if pdSet.Status.ReadyReplicas != pdSet.Status.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != .status.Replicas(%d)", - ns, pdSetName, pdSet.Status.ReadyReplicas, pdSet.Status.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != .status.Replicas(%d)", pdStsID, pdSet.Status.ReadyReplicas, pdSet.Status.Replicas) return false, nil } c, found := getMemberContainer(oa.kubeCli, oa.tcStsGetter, ns, tc.Name, label.PDLabelVal) if !found { - log.Logf("statefulset: %s/%s not found containers[name=pd] or pod %s-0", - ns, pdSetName, pdSetName) + log.Logf("StatefulSet: %q not found containers[name=pd] or pod %s-0", pdStsID, pdSetName) return false, nil } if tc.PDImage() != c.Image { - log.Logf("statefulset: %s/%s .spec.template.spec.containers[name=pd].image(%s) != %s", - ns, pdSetName, c.Image, tc.PDImage()) + log.Logf("StatefulSet: %q .spec.template.spec.containers[name=pd].image(%s) != %s", pdStsID, c.Image, tc.PDImage()) return false, nil } for _, member := range tc.Status.PD.Members { if !member.Health { - log.Logf("tidbcluster: %s/%s pd member(%s/%s) is not health", - ns, tcName, member.ID, member.Name) + log.Logf("TidbCluster: %q pd member(%s/%s) is not health", tcID, member.ID, member.Name) return false, nil } } pdServiceName := controller.PDMemberName(tcName) - pdPeerServiceName := controller.PDPeerMemberName(tcName) if _, err := oa.kubeCli.CoreV1().Services(ns).Get(pdServiceName, metav1.GetOptions{}); err != nil { log.Logf("failed to get service: %s/%s", ns, pdServiceName) return false, nil } + pdPeerServiceName := controller.PDPeerMemberName(tcName) if _, err := oa.kubeCli.CoreV1().Services(ns).Get(pdPeerServiceName, metav1.GetOptions{}); err != nil { log.Logf("failed to get peer service: %s/%s", ns, pdPeerServiceName) return false, nil } + log.Logf("pd members are ready for tc %q", tcID) return true, nil } func (oa *OperatorActions) tikvMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, error) { if tc.Spec.TiKV == nil { + log.Logf("no tikv in tc spec, skip") return true, nil } tcName := tc.GetName() ns := tc.GetNamespace() tikvSetName := controller.TiKVMemberName(tcName) + tcID := fmt.Sprintf("%s/%s", ns, tcName) + tikvStsID := fmt.Sprintf("%s/%s", ns, tikvSetName) tikvSet, err := oa.tcStsGetter.StatefulSets(ns).Get(tikvSetName, metav1.GetOptions{}) if err != nil { - log.Logf("failed to get statefulset: %s/%s, %v", ns, tikvSetName, err) + log.Logf("failed to get StatefulSet: %q, %v", tikvStsID, err) return false, nil } @@ -1509,48 +1509,42 @@ func (oa *OperatorActions) tikvMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e } if tc.Status.TiKV.StatefulSet == nil { - log.Logf("tidbcluster: %s/%s .status.TiKV.StatefulSet is nil", ns, tcName) + log.Logf("TidbCluster: %q .status.TiKV.StatefulSet is nil", tcID) return false, nil } failureCount := len(tc.Status.TiKV.FailureStores) replicas := tc.Spec.TiKV.Replicas + int32(failureCount) if *tikvSet.Spec.Replicas != replicas { - log.Logf("statefulset: %s/%s .spec.Replicas(%d) != %d", - ns, tikvSetName, *tikvSet.Spec.Replicas, replicas) + log.Logf("StatefulSet: %q .spec.Replicas(%d) != %d", tikvStsID, *tikvSet.Spec.Replicas, replicas) return false, nil } if tikvSet.Status.ReadyReplicas != replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != %d", - ns, tikvSetName, tikvSet.Status.ReadyReplicas, replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != %d", tikvStsID, tikvSet.Status.ReadyReplicas, replicas) return false, nil } if len(tc.Status.TiKV.Stores) != int(replicas) { - log.Logf("tidbcluster: %s/%s .status.TiKV.Stores.count(%d) != %d", - ns, tcName, len(tc.Status.TiKV.Stores), replicas) + log.Logf("TidbCluster: %q .status.TiKV.Stores.count(%d) != %d", tcID, len(tc.Status.TiKV.Stores), replicas) return false, nil } if tikvSet.Status.ReadyReplicas != tikvSet.Status.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != .status.Replicas(%d)", - ns, tikvSetName, tikvSet.Status.ReadyReplicas, tikvSet.Status.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != .status.Replicas(%d)", tikvStsID, tikvSet.Status.ReadyReplicas, tikvSet.Status.Replicas) return false, nil } c, found := getMemberContainer(oa.kubeCli, oa.tcStsGetter, ns, tc.Name, label.TiKVLabelVal) if !found { - log.Logf("statefulset: %s/%s not found containers[name=tikv] or pod %s-0", - ns, tikvSetName, tikvSetName) + log.Logf("StatefulSet: %q not found containers[name=tikv] or pod %s-0", tikvStsID, tikvSetName) return false, nil } if tc.TiKVImage() != c.Image { - log.Logf("statefulset: %s/%s .spec.template.spec.containers[name=tikv].image(%s) != %s", - ns, tikvSetName, c.Image, tc.TiKVImage()) + log.Logf("StatefulSet: %q .spec.template.spec.containers[name=tikv].image(%s) != %s", tikvStsID, c.Image, tc.TiKVImage()) return false, nil } for _, store := range tc.Status.TiKV.Stores { if store.State != v1alpha1.TiKVStateUp { - log.Logf("tidbcluster: %s/%s's store(%s) state != %s", ns, tcName, store.ID, v1alpha1.TiKVStateUp) + log.Logf("TidbCluster: %q .status.TiKV store(%s) state != %s", tcID, store.ID, v1alpha1.TiKVStateUp) return false, nil } } @@ -1561,17 +1555,24 @@ func (oa *OperatorActions) tikvMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e return false, nil } + log.Logf("tikv members are ready for tc %q", tcID) return true, nil } func (oa *OperatorActions) tiflashMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, error) { + if tc.Spec.TiFlash == nil { + log.Logf("no tiflash in tc spec, skip") + return true, nil + } tcName := tc.GetName() ns := tc.GetNamespace() tiflashSetName := controller.TiFlashMemberName(tcName) + tcID := fmt.Sprintf("%s/%s", ns, tcName) + tiflashStsID := fmt.Sprintf("%s/%s", ns, tiflashSetName) tiflashSet, err := oa.tcStsGetter.StatefulSets(ns).Get(tiflashSetName, metav1.GetOptions{}) if err != nil { - log.Logf("TiFlash failed to get statefulset: %s/%s, %v", ns, tiflashSetName, err) + log.Logf("failed to get StatefulSet: %q, %v", tiflashStsID, err) return false, nil } @@ -1585,48 +1586,42 @@ func (oa *OperatorActions) tiflashMembersReadyFn(tc *v1alpha1.TidbCluster) (bool } if tc.Status.TiFlash.StatefulSet == nil { - log.Logf("tidbcluster: %s/%s .status.TiFlash.StatefulSet is nil", ns, tcName) + log.Logf("TidbCluster: %q .status.TiFlash.StatefulSet is nil", tcID) return false, nil } failureCount := len(tc.Status.TiFlash.FailureStores) replicas := tc.Spec.TiFlash.Replicas + int32(failureCount) if *tiflashSet.Spec.Replicas != replicas { - log.Logf("TiFlash statefulset: %s/%s .spec.Replicas(%d) != %d", - ns, tiflashSetName, *tiflashSet.Spec.Replicas, replicas) + log.Logf("TiFlash StatefulSet: %q .spec.Replicas(%d) != %d", tiflashStsID, *tiflashSet.Spec.Replicas, replicas) return false, nil } if tiflashSet.Status.ReadyReplicas != replicas { - log.Logf("TiFlash statefulset: %s/%s .status.ReadyReplicas(%d) != %d", - ns, tiflashSetName, tiflashSet.Status.ReadyReplicas, replicas) + log.Logf("TiFlash StatefulSet: %q .status.ReadyReplicas(%d) != %d", tiflashStsID, tiflashSet.Status.ReadyReplicas, replicas) return false, nil } if len(tc.Status.TiFlash.Stores) != int(replicas) { - log.Logf("tidbcluster: %s/%s .status.TiFlash.Stores.count(%d) != %d", - ns, tcName, len(tc.Status.TiFlash.Stores), replicas) + log.Logf("TidbCluster: %q .status.TiFlash.Stores.count(%d) != %d", tcID, len(tc.Status.TiFlash.Stores), replicas) return false, nil } if tiflashSet.Status.ReadyReplicas != tiflashSet.Status.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != .status.Replicas(%d)", - ns, tiflashSetName, tiflashSet.Status.ReadyReplicas, tiflashSet.Status.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != .status.Replicas(%d)", tiflashStsID, tiflashSet.Status.ReadyReplicas, tiflashSet.Status.Replicas) return false, nil } c, found := getMemberContainer(oa.kubeCli, oa.tcStsGetter, ns, tc.Name, label.TiFlashLabelVal) if !found { - log.Logf("statefulset: %s/%s not found containers[name=tiflash] or pod %s-0", - ns, tiflashSetName, tiflashSetName) + log.Logf("StatefulSet: %q not found containers[name=tiflash] or pod %s-0", tiflashStsID, tiflashSetName) return false, nil } if tc.TiFlashImage() != c.Image { - log.Logf("statefulset: %s/%s .spec.template.spec.containers[name=tiflash].image(%s) != %s", - ns, tiflashSetName, c.Image, tc.TiFlashImage()) + log.Logf("StatefulSet: %q .spec.template.spec.containers[name=tiflash].image(%s) != %s", tiflashStsID, c.Image, tc.TiFlashImage()) return false, nil } for _, store := range tc.Status.TiFlash.Stores { if store.State != v1alpha1.TiKVStateUp { - log.Logf("TiFlash tidbcluster: %s/%s's store(%s) state != %s", ns, tcName, store.ID, v1alpha1.TiKVStateUp) + log.Logf("TiFlash TidbCluster: %q's store(%s) state != %s", tcID, store.ID, v1alpha1.TiKVStateUp) return false, nil } } @@ -1636,21 +1631,25 @@ func (oa *OperatorActions) tiflashMembersReadyFn(tc *v1alpha1.TidbCluster) (bool log.Logf("failed to get peer service: %s/%s", ns, tiflashPeerServiceName) return false, nil } - log.Logf("TiFlash ready: %s/%s", ns, tcName) + + log.Logf("tiflash members are ready for tc %q", tcID) return true, nil } func (oa *OperatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, error) { if tc.Spec.TiDB == nil { + log.Logf("no tidb in tc spec, skip") return true, nil } tcName := tc.GetName() ns := tc.GetNamespace() tidbSetName := controller.TiDBMemberName(tcName) + tcID := fmt.Sprintf("%s/%s", ns, tcName) + tidbStsID := fmt.Sprintf("%s/%s", ns, tidbSetName) tidbSet, err := oa.tcStsGetter.StatefulSets(ns).Get(tidbSetName, metav1.GetOptions{}) if err != nil { - log.Logf("failed to get statefulset: %s/%s, %v", ns, tidbSetName, err) + log.Logf("failed to get StatefulSet: %q, %v", tidbStsID, err) return false, nil } @@ -1664,56 +1663,51 @@ func (oa *OperatorActions) tidbMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, e } if tc.Status.TiDB.StatefulSet == nil { - log.Logf("tidbcluster: %s/%s .status.TiDB.StatefulSet is nil", ns, tcName) + log.Logf("TidbCluster: %q .status.TiDB.StatefulSet is nil", tcID) return false, nil } failureCount := len(tc.Status.TiDB.FailureMembers) replicas := tc.Spec.TiDB.Replicas + int32(failureCount) if *tidbSet.Spec.Replicas != replicas { - log.Logf("statefulset: %s/%s .spec.Replicas(%d) != %d", - ns, tidbSetName, *tidbSet.Spec.Replicas, replicas) + log.Logf("StatefulSet: %q .spec.Replicas(%d) != %d", tidbStsID, *tidbSet.Spec.Replicas, replicas) return false, nil } if tidbSet.Status.ReadyReplicas != tc.Spec.TiDB.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != %d", - ns, tidbSetName, tidbSet.Status.ReadyReplicas, tc.Spec.TiDB.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != %d", tidbStsID, tidbSet.Status.ReadyReplicas, tc.Spec.TiDB.Replicas) return false, nil } if len(tc.Status.TiDB.Members) != int(tc.Spec.TiDB.Replicas) { - log.Logf("tidbcluster: %s/%s .status.TiDB.Members count(%d) != %d", - ns, tcName, len(tc.Status.TiDB.Members), tc.Spec.TiDB.Replicas) + log.Logf("TidbCluster: %q .status.TiDB.Members count(%d) != %d", tcID, len(tc.Status.TiDB.Members), tc.Spec.TiDB.Replicas) return false, nil } if tidbSet.Status.ReadyReplicas != tidbSet.Status.Replicas { - log.Logf("statefulset: %s/%s .status.ReadyReplicas(%d) != .status.Replicas(%d)", - ns, tidbSetName, tidbSet.Status.ReadyReplicas, tidbSet.Status.Replicas) + log.Logf("StatefulSet: %q .status.ReadyReplicas(%d) != .status.Replicas(%d)", tidbStsID, tidbSet.Status.ReadyReplicas, tidbSet.Status.Replicas) return false, nil } c, found := getMemberContainer(oa.kubeCli, oa.tcStsGetter, ns, tc.Name, label.TiDBLabelVal) if !found { - log.Logf("statefulset: %s/%s not found containers[name=tidb] or pod %s-0", - ns, tidbSetName, tidbSetName) + log.Logf("StatefulSet: %q not found containers[name=tidb] or pod %s-0", tidbStsID, tidbSetName) return false, nil } if tc.TiDBImage() != c.Image { - log.Logf("statefulset: %s/%s .spec.template.spec.containers[name=tidb].image(%s) != %s", - ns, tidbSetName, c.Image, tc.TiDBImage()) + log.Logf("StatefulSet: %q .spec.template.spec.containers[name=tidb].image(%s) != %s", tidbStsID, c.Image, tc.TiDBImage()) return false, nil } - _, err = oa.kubeCli.CoreV1().Services(ns).Get(tidbSetName, metav1.GetOptions{}) - if err != nil { - log.Logf("failed to get service: %s/%s", ns, tidbSetName) + tidbServiceName := controller.TiDBMemberName(tcName) + if _, err = oa.kubeCli.CoreV1().Services(ns).Get(tidbServiceName, metav1.GetOptions{}); err != nil { + log.Logf("failed to get service: %s/%s", ns, tidbServiceName) return false, nil } - _, err = oa.kubeCli.CoreV1().Services(ns).Get(controller.TiDBPeerMemberName(tcName), metav1.GetOptions{}) - if err != nil { - log.Logf("failed to get peer service: %s/%s", ns, controller.TiDBPeerMemberName(tcName)) + tidbPeerServiceName := controller.TiDBPeerMemberName(tcName) + if _, err = oa.kubeCli.CoreV1().Services(ns).Get(tidbPeerServiceName, metav1.GetOptions{}); err != nil { + log.Logf("failed to get peer service: %s/%s", ns, tidbPeerServiceName) return false, nil } + log.Logf("tidb members are ready for tc %q", tcID) return true, nil } @@ -3007,7 +3001,7 @@ func (oa *OperatorActions) CheckIncrementalBackup(info *TidbClusterConfig, withD isv1 := info.OperatorTag == "v1.0.0" for _, pod := range pods.Items { - if !oa.pumpHealth(info.ClusterName, info.Namespace, pod.Name, false) { + if !oa.pumpIsHealthy(info.ClusterName, info.Namespace, pod.Name, false) { log.Logf("some pods is not health %s", pumpStatefulSetName) return false, nil } @@ -3177,7 +3171,7 @@ type nodeStatus struct { State string `json:"state"` } -func (oa *OperatorActions) pumpHealth(tcName, ns, podName string, tlsEnabled bool) bool { +func (oa *OperatorActions) pumpIsHealthy(tcName, ns, podName string, tlsEnabled bool) bool { var err error var addr string if oa.fw != nil { @@ -3541,37 +3535,46 @@ func (oa *OperatorActions) CheckInitSQLOrDie(info *TidbClusterConfig) { } func (oa *OperatorActions) pumpMembersReadyFn(tc *v1alpha1.TidbCluster) (bool, error) { + if tc.Spec.Pump == nil { + log.Logf("no pump in tc spec, skip") + return true, nil + } tcName := tc.GetName() ns := tc.GetNamespace() - ssName := controller.PumpMemberName(tcName) + pumpSetName := controller.PumpMemberName(tcName) + tcID := fmt.Sprintf("%s/%s", ns, tcName) + pumpStsID := fmt.Sprintf("%s/%s", ns, pumpSetName) - ss, err := oa.tcStsGetter.StatefulSets(ns).Get(ssName, metav1.GetOptions{}) + pumpSet, err := oa.tcStsGetter.StatefulSets(ns).Get(pumpSetName, metav1.GetOptions{}) if err != nil { - log.Logf("failed to get statefulset: %s/%s, %v", ns, ssName, err) + log.Logf("failed to get StatefulSet: %q, %v", pumpStsID, err) return false, nil } - if ss.Status.CurrentRevision != ss.Status.UpdateRevision { - log.Logf("pump sts .Status.CurrentRevision (%s) != .Status.UpdateRevision (%s)", ss.Status.CurrentRevision, ss.Status.UpdateRevision) + if pumpSet.Status.CurrentRevision != pumpSet.Status.UpdateRevision { + log.Logf("pump sts .Status.CurrentRevision (%s) != .Status.UpdateRevision (%s)", pumpSet.Status.CurrentRevision, pumpSet.Status.UpdateRevision) return false, nil } - if !utilstatefulset.IsAllDesiredPodsRunningAndReady(helper.NewHijackClient(oa.kubeCli, oa.asCli), ss) { + if !utilstatefulset.IsAllDesiredPodsRunningAndReady(helper.NewHijackClient(oa.kubeCli, oa.asCli), pumpSet) { return false, nil } // check all pump replicas are online - for i := 0; i < int(*ss.Spec.Replicas); i++ { - podName := fmt.Sprintf("%s-%d", ssName, i) - if !oa.pumpHealth(tc.Name, tc.Namespace, podName, tc.IsTLSClusterEnabled()) { + for i := 0; i < int(*pumpSet.Spec.Replicas); i++ { + podName := fmt.Sprintf("%s-%d", pumpSetName, i) + if !oa.pumpIsHealthy(tc.Name, tc.Namespace, podName, tc.IsTLSClusterEnabled()) { log.Logf("%s is not health yet", podName) return false, nil } } + + log.Logf("pump members are ready for tc %q", tcID) return true, nil } // FIXME: this duplicates with WaitForTidbClusterReady in crd_test_utils.go, and all functions in it +// TODO: sync with e2e doc func (oa *OperatorActions) WaitForTidbClusterReady(tc *v1alpha1.TidbCluster, timeout, pollInterval time.Duration) error { if tc == nil { return fmt.Errorf("tidbcluster is nil, cannot call WaitForTidbClusterReady") @@ -3579,50 +3582,39 @@ func (oa *OperatorActions) WaitForTidbClusterReady(tc *v1alpha1.TidbCluster, tim return wait.PollImmediate(pollInterval, timeout, func() (bool, error) { var local *v1alpha1.TidbCluster var err error + tcID := fmt.Sprintf("%s/%s", tc.Namespace, tc.Name) + if local, err = oa.cli.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}); err != nil { - log.Logf("failed to get tidbcluster: %s/%s, %v", tc.Namespace, tc.Name, err) + log.Logf("failed to get TidbCluster: %q, %v", tcID, err) return false, nil } if b, err := oa.pdMembersReadyFn(local); !b && err == nil { - log.Logf("pd members are not ready for tc %q", tc.Name) + log.Logf("pd members are not ready for tc %q", tcID) return false, nil } - log.Logf("pd members are ready for tc %q", tc.Name) if b, err := oa.tikvMembersReadyFn(local); !b && err == nil { - log.Logf("tikv members are not ready for tc %q", tc.Name) + log.Logf("tikv members are not ready for tc %q", tcID) return false, nil } - log.Logf("tikv members are ready for tc %q", tc.Name) if b, err := oa.tidbMembersReadyFn(local); !b && err == nil { - log.Logf("tidb members are not ready for tc %q", tc.Name) + log.Logf("tidb members are not ready for tc %q", tcID) return false, nil } - log.Logf("tidb members are ready for tc %q", tc.Name) - if tc.Spec.TiFlash != nil && tc.Spec.TiFlash.Replicas > int32(0) { - if b, err := oa.tiflashMembersReadyFn(local); !b && err == nil { - log.Logf("tiflash members are not ready for tc %q", tc.Name) - return false, nil - } - log.Logf("tiflash members are ready for tc %q", tc.Name) - } else { - log.Logf("no tiflash in tc spec") + if b, err := oa.tiflashMembersReadyFn(local); !b && err == nil { + log.Logf("tiflash members are not ready for tc %q", tcID) + return false, nil } - if tc.Spec.Pump != nil { - if b, err := oa.pumpMembersReadyFn(local); !b && err == nil { - log.Logf("pump members are not ready for tc %q", tc.Name) - return false, nil - } - log.Logf("pump members are ready for tc %q", tc.Name) - } else { - log.Logf("no pump in tc spec") + if b, err := oa.pumpMembersReadyFn(local); !b && err == nil { + log.Logf("pump members are not ready for tc %q", tcID) + return false, nil } - log.Logf("TidbCluster is ready") + log.Logf("TidbCluster %q is ready", tcID) return true, nil }) } diff --git a/tests/e2e/tidbcluster/stability.go b/tests/e2e/tidbcluster/stability.go index c7a5160c84..f1b22250d3 100644 --- a/tests/e2e/tidbcluster/stability.go +++ b/tests/e2e/tidbcluster/stability.go @@ -1082,7 +1082,7 @@ var _ = ginkgo.Describe("[Stability]", func() { framework.ExpectNoError(err, "failed to wait for the record of failed pod to be removed from failure stores") ginkgo.By("Waiting for the tidb cluster to become ready") - err = utiltidbcluster.WaitForTidbClusterReady(cli, tc.Namespace, tc.Name, time.Minute*30, 0) + err = utiltidbcluster.WaitForTidbClusterConditionReady(cli, tc.Namespace, tc.Name, time.Minute*30, 0) framework.ExpectNoError(err, "failed to wait for TidbCluster ready: %v", tc) }) }) diff --git a/tests/e2e/tidbcluster/tidbcluster.go b/tests/e2e/tidbcluster/tidbcluster.go index 1a4fc65ff4..bb199c35e1 100644 --- a/tests/e2e/tidbcluster/tidbcluster.go +++ b/tests/e2e/tidbcluster/tidbcluster.go @@ -26,7 +26,6 @@ import ( astsHelper "github.com/pingcap/advanced-statefulset/client/apis/apps/v1/helper" asclientset "github.com/pingcap/advanced-statefulset/client/client/clientset/versioned" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -316,7 +315,7 @@ var _ = ginkgo.Describe("TiDBCluster", func() { ginkgo.By("Check webhook is still running") webhookPod, err = c.CoreV1().Pods(webhookPod.Namespace).Get(webhookPod.Name, metav1.GetOptions{}) framework.ExpectNoError(err, "failed to get webhook pod %s/%s", webhookPod.Namespace, webhookPod.Name) - if webhookPod.Status.Phase != v1.PodRunning { + if webhookPod.Status.Phase != corev1.PodRunning { logs, err := pod.GetPodLogs(c, webhookPod.Namespace, webhookPod.Name, "webhook") framework.ExpectNoError(err, "failed to get pod log %s/%s", webhookPod.Namespace, webhookPod.Name) log.Logf("webhook logs: %s", logs) @@ -1574,6 +1573,366 @@ var _ = ginkgo.Describe("TiDBCluster", func() { framework.ExpectNoError(err, "failed to wait for TidbCluster ready: %q", tc.Name) }) + // test cases for tc upgrade + ginkgo.Context("upgrade should work correctly", func() { + ginkgo.It("for tc and components version", func() { + ginkgo.By("Deploy initial tc") + tc := fixture.GetTidbCluster(ns, "upgrade-version", utilimage.TiDBV4Prev) + pvRetain := corev1.PersistentVolumeReclaimRetain + tc.Spec.PVReclaimPolicy = &pvRetain + tc.Spec.PD.StorageClassName = pointer.StringPtr("local-storage") + tc.Spec.TiKV.StorageClassName = pointer.StringPtr("local-storage") + tc.Spec.TiDB.StorageClassName = pointer.StringPtr("local-storage") + utiltc.MustCreateTCWithComponentsReady(genericCli, oa, tc, 5*time.Minute, 10*time.Second) + + ginkgo.By("Update tc version") + err := controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.Version = utilimage.TiDBV4 + return nil + }) + framework.ExpectNoError(err, "failed to update tc version to %q", utilimage.TiDBV4) + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", ns, tc.Name) + + ginkgo.By("Update components version") + componentVersion := utilimage.TiDBV4Prev + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.PD.Version = pointer.StringPtr(componentVersion) + tc.Spec.TiKV.Version = pointer.StringPtr(componentVersion) + tc.Spec.TiDB.Version = pointer.StringPtr(componentVersion) + return nil + }) + framework.ExpectNoError(err, "failed to update components version to %q", componentVersion) + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", ns, tc.Name) + + ginkgo.By("Check components version") + pdMemberName := controller.PDMemberName(tc.Name) + pdSts, err := stsGetter.StatefulSets(ns).Get(pdMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, pdMemberName) + pdImage := fmt.Sprintf("pingcap/pd:%s", componentVersion) + framework.ExpectEqual(pdSts.Spec.Template.Spec.Containers[0].Image, pdImage, "pd sts image should be %q", pdImage) + + tikvMemberName := controller.TiKVMemberName(tc.Name) + tikvSts, err := stsGetter.StatefulSets(ns).Get(tikvMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, tikvMemberName) + tikvImage := fmt.Sprintf("pingcap/tikv:%s", componentVersion) + framework.ExpectEqual(tikvSts.Spec.Template.Spec.Containers[0].Image, tikvImage, "tikv sts image should be %q", tikvImage) + + tidbMemberName := controller.TiDBMemberName(tc.Name) + tidbSts, err := stsGetter.StatefulSets(ns).Get(tidbMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, tidbMemberName) + tidbImage := fmt.Sprintf("pingcap/tidb:%s", componentVersion) + // the 0th container for tidb pod is slowlog, which runs busybox + framework.ExpectEqual(tidbSts.Spec.Template.Spec.Containers[1].Image, tidbImage, "tidb sts image should be %q", tidbImage) + }) + + ginkgo.It("for configuration update", func() { + ginkgo.By("Deploy initial tc") + tc := fixture.GetTidbCluster(ns, "update-config", utilimage.TiDBV4) + utiltc.MustCreateTCWithComponentsReady(genericCli, oa, tc, 5*time.Minute, 10*time.Second) + + ginkgo.By("Update components configuration") + err := controller.GuaranteedUpdate(genericCli, tc, func() error { + pdCfg := v1alpha1.NewPDConfig() + tikvCfg := v1alpha1.NewTiKVConfig() + tidbCfg := v1alpha1.NewTiDBConfig() + pdCfg.Set("lease", 3) + tikvCfg.Set("status-thread-pool-size", 1) + tidbCfg.Set("token-limit", 10000) + tc.Spec.PD.Config = pdCfg + tc.Spec.TiKV.Config = tikvCfg + tc.Spec.TiDB.Config = tidbCfg + return nil + }) + framework.ExpectNoError(err, "failed to update components configuration") + + ginkgo.By("Wait for PD to be in UpgradePhase") + utiltc.MustWaitForPDPhase(cli, tc, v1alpha1.UpgradePhase, 3*time.Minute, 10*time.Second) + log.Logf("PD is in UpgradePhase") + + ginkgo.By("Wait for tc ready") + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", ns, tc.Name) + + ginkgo.By("Check PD configuration") + pdMemberName := controller.PDMemberName(tc.Name) + pdSts, err := stsGetter.StatefulSets(ns).Get(pdMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, pdMemberName) + pdCmName := member.FindConfigMapVolume(&pdSts.Spec.Template.Spec, func(name string) bool { + return strings.HasPrefix(name, controller.PDMemberName(tc.Name)) + }) + pdCm, err := c.CoreV1().ConfigMaps(ns).Get(pdCmName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get ConfigMap %s/%s", ns, pdCm) + log.Logf("PD config:\n%s", pdCm.Data["config-file"]) + gomega.Expect(pdCm.Data["config-file"]).To(gomega.ContainSubstring("lease = 3")) + + ginkgo.By("Wait for TiKV to be in UpgradePhase") + utiltc.MustWaitForTiKVPhase(cli, tc, v1alpha1.UpgradePhase, 3*time.Minute, 10*time.Second) + log.Logf("TiKV is in UpgradePhase") + + ginkgo.By("Wait for tc ready") + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", ns, tc.Name) + + ginkgo.By("Check TiKV configuration") + tikvMemberName := controller.TiKVMemberName(tc.Name) + tikvSts, err := stsGetter.StatefulSets(ns).Get(tikvMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, tikvMemberName) + tikvCmName := member.FindConfigMapVolume(&tikvSts.Spec.Template.Spec, func(name string) bool { + return strings.HasPrefix(name, controller.TiKVMemberName(tc.Name)) + }) + tikvCm, err := c.CoreV1().ConfigMaps(ns).Get(tikvCmName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get ConfigMap %s/%s", ns, tikvCmName) + log.Logf("TiKV config:\n%s", tikvCm.Data["config-file"]) + gomega.Expect(tikvCm.Data["config-file"]).To(gomega.ContainSubstring("status-thread-pool-size = 1")) + + ginkgo.By("Wait for TiDB to be in UpgradePhase") + utiltc.MustWaitForTiDBPhase(cli, tc, v1alpha1.UpgradePhase, 3*time.Minute, 10*time.Second) + log.Logf("TiDB is in UpgradePhase") + + ginkgo.By("Wait for tc ready") + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", ns, tc.Name) + + ginkgo.By("Check TiDB configuration") + tidbMemberName := controller.TiDBMemberName(tc.Name) + tidbSts, err := stsGetter.StatefulSets(ns).Get(tidbMemberName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get StatefulSet %s/%s", ns, tidbMemberName) + tidbCmName := member.FindConfigMapVolume(&tidbSts.Spec.Template.Spec, func(name string) bool { + return strings.HasPrefix(name, controller.TiDBMemberName(tc.Name)) + }) + tidbCm, err := c.CoreV1().ConfigMaps(ns).Get(tidbCmName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get ConfigMap %s/%s", ns, tidbCmName) + log.Logf("TiDB config:\n%s", tidbCm.Data["config-file"]) + gomega.Expect(tidbCm.Data["config-file"]).To(gomega.ContainSubstring("token-limit = 10000")) + }) + + // this case merge scale-in/scale-out into one case, may seems a little bit dense + // when scale-in, replica is first set to 5 and changed to 3 + // when scale-out, replica is first set to 3 and changed to 5 + ginkgo.Context("while concurrently scale PD", func() { + operation := []string{"in", "out"} + for _, op := range operation { + ginkgo.It(op, func() { + ginkgo.By("Deploy initial tc") + tcName := fmt.Sprintf("scale-%s-pd-concurrently", op) + tc := fixture.GetTidbCluster(ns, tcName, utilimage.TiDBV4Prev) + tc.Spec.PD.StorageClassName = pointer.StringPtr("local-storage") + if op == "in" { + tc.Spec.PD.Replicas = 5 + } else { + tc.Spec.PD.Replicas = 3 + } + utiltc.MustCreateTCWithComponentsReady(genericCli, oa, tc, 10*time.Minute, 10*time.Second) + + ginkgo.By("Upgrade PD version") + err := controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.PD.Version = pointer.StringPtr(utilimage.TiDBV4) + return nil + }) + framework.ExpectNoError(err, "failed to update PD version to %q", utilimage.TiDBV4) + + ginkgo.By(fmt.Sprintf("Wait for PD phase is %q", v1alpha1.UpgradePhase)) + err = wait.PollImmediate(10*time.Second, 3*time.Minute, func() (bool, error) { + tc, err := cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}) + if err != nil { + log.Logf("failed to get TidbCluster %s/%s: %v", ns, tcName, err) + return false, nil + } + if tc.Status.PD.Phase != v1alpha1.UpgradePhase { + log.Logf("tc.Status.PD.Phase = %q, not %q yet", tc.Status.PD.Phase, v1alpha1.UpgradePhase) + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for PD phase") + + ginkgo.By(fmt.Sprintf("Scale %s PD while in %q phase", op, v1alpha1.UpgradePhase)) + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + if op == "in" { + tc.Spec.PD.Replicas = 3 + } else { + tc.Spec.PD.Replicas = 5 + } + return nil + }) + framework.ExpectNoError(err, "failed to scale %s PD", op) + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster ready: %s/%s", ns, tc.Name) + + ginkgo.By("Check PD replicas") + tc, err = cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster %s/%s: %v", ns, tcName, err) + if op == "in" { + framework.ExpectEqual(int(tc.Spec.PD.Replicas), 3) + } else { + framework.ExpectEqual(int(tc.Spec.PD.Replicas), 5) + } + }) + } + }) + + // similar to PD scale-in/scale-out case above, need to check no evict leader scheduler left + ginkgo.Context("while concurrently scale TiKV", func() { + operation := []string{"in", "out"} + for _, op := range operation { + ginkgo.It(op, func() { + ginkgo.By("Deploy initial tc") + tcName := fmt.Sprintf("scale-%s-tikv-concurrently", op) + tc := fixture.GetTidbCluster(ns, tcName, utilimage.TiDBV4Prev) + if op == "in" { + tc.Spec.TiKV.Replicas = 4 + } else { + tc.Spec.TiKV.Replicas = 3 + } + utiltc.MustCreateTCWithComponentsReady(genericCli, oa, tc, 10*time.Minute, 10*time.Second) + + ginkgo.By("Upgrade TiKV version") + err := controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.TiKV.Version = pointer.StringPtr(utilimage.TiDBV4) + return nil + }) + framework.ExpectNoError(err, "failed to update TiKV version to %q", utilimage.TiDBV4) + + ginkgo.By(fmt.Sprintf("Wait for TiKV phase is %q", v1alpha1.UpgradePhase)) + err = wait.PollImmediate(10*time.Second, 3*time.Minute, func() (bool, error) { + tc, err := cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}) + if err != nil { + log.Logf("failed to get TidbCluster %s/%s: %v", ns, tcName, err) + return false, nil + } + if tc.Status.TiKV.Phase != v1alpha1.UpgradePhase { + log.Logf("tc.Status.TiKV.Phase = %q, not %q yet", tc.Status.TiKV.Phase, v1alpha1.UpgradePhase) + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for TiKV phase") + + ginkgo.By(fmt.Sprintf("Scale %s TiKV while in %q phase", op, v1alpha1.UpgradePhase)) + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + if op == "in" { + tc.Spec.TiKV.Replicas = 3 + } else { + tc.Spec.TiKV.Replicas = 4 + } + return nil + }) + framework.ExpectNoError(err, "failed to scale %s TiKV", op) + + ginkgo.By("Wait for TiKV to be in ScalePhase") + utiltc.MustWaitForTiKVPhase(cli, tc, v1alpha1.ScalePhase, 3*time.Minute, 10*time.Second) + log.Logf("TiKV is in ScalePhase") + + ginkgo.By("Wait for tc ready") + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster ready: %s/%s", ns, tc.Name) + + ginkgo.By("Check TiKV replicas") + tc, err = cli.PingcapV1alpha1().TidbClusters(ns).Get(tcName, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster %s/%s: %v", ns, tcName, err) + if op == "in" { + framework.ExpectEqual(int(tc.Spec.TiKV.Replicas), 3) + } else { + framework.ExpectEqual(int(tc.Spec.TiKV.Replicas), 4) + } + log.Logf("TiKV replicas number is correct") + + ginkgo.By("Check no evict leader scheduler left") + pdClient, cancel, err := proxiedpdclient.NewProxiedPDClient(c, fw, ns, tc.Name, false) + framework.ExpectNoError(err, "create pdClient error") + defer cancel() + err = wait.Poll(5*time.Second, 3*time.Minute, func() (bool, error) { + schedulers, err := pdClient.GetEvictLeaderSchedulers() + framework.ExpectNoError(err, "failed to get evict leader schedulers") + if len(schedulers) != 0 { + log.Logf("there are %d evict leader left, expect 0", len(schedulers)) + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for evict leader schedulers to become 0") + }) + } + }) + + ginkgo.It("with bad PD config, then recover after force upgrading PD", func() { + ginkgo.By("Deploy initial tc with incorrect PD image") + tc := fixture.GetTidbCluster(ns, "force-upgrade-pd", utilimage.TiDBV4Prev) + tc.Spec.PD.BaseImage = "wrong-pd-image" + err := genericCli.Create(context.TODO(), tc) + framework.ExpectNoError(err, "failed to create TidbCluster %s/%s", ns, tc.Name) + + ginkgo.By("Wait for 1 min and ensure no healthy PD Pod exist") + err = wait.PollImmediate(5*time.Second, 1*time.Minute, func() (bool, error) { + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(label.New().Instance(tc.Name).Component(label.PDLabelVal).Labels()).String(), + } + pods, err := c.CoreV1().Pods(ns).List(listOptions) + framework.ExpectNoError(err, "failed to list Pods with selector %+v", listOptions) + for _, pod := range pods.Items { + framework.ExpectNotEqual(pod.Status.Phase, corev1.PodRunning, "expect PD Pod %s/%s not to be running", ns, pod.Name) + } + return false, nil + }) + framework.ExpectEqual(err, wait.ErrWaitTimeout, "no Pod should be found for PD") + + ginkgo.By("Update PD Pod to correct image") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.PD.BaseImage = "pingcap/pd" + return nil + }) + framework.ExpectNoError(err, "failed to change PD Pod image") + + ginkgo.By("Wait for 1 min and ensure no healthy PD Pod exist") + err = wait.PollImmediate(5*time.Second, 1*time.Minute, func() (bool, error) { + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(label.New().Instance(tc.Name).Component(label.PDLabelVal).Labels()).String(), + } + pods, err := c.CoreV1().Pods(ns).List(listOptions) + framework.ExpectNoError(err, "failed to list Pods with selector %+v", listOptions) + for _, pod := range pods.Items { + framework.ExpectNotEqual(pod.Status.Phase, corev1.PodRunning, "expect PD Pod %s/%s not to be running", ns, pod.Name) + } + return false, nil + }) + framework.ExpectEqual(err, wait.ErrWaitTimeout, "no Pod should be found for PD") + + ginkgo.By("Annotate TidbCluster for force upgrade") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + if tc.Annotations == nil { + tc.Annotations = make(map[string]string) + } + tc.Annotations["tidb.pingcap.com/force-upgrade"] = "true" + tc.Spec.PD.BaseImage = "wrong" // we need to make this right later + return nil + }) + framework.ExpectNoError(err, "failed to annotate tc for force upgrade") + err = wait.PollImmediate(5*time.Second, 1*time.Minute, func() (bool, error) { + tc, err := cli.PingcapV1alpha1().TidbClusters(ns).Get(tc.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster %s/%s", ns, tc.Name) + val, exist := tc.Annotations["tidb.pingcap.com/force-upgrade"] + if !exist { + log.Logf("annotation tidb.pingcap.com/force-upgrade not exist in tc") + return false, nil + } + framework.ExpectEqual(val, "true", "tc annotation tidb.pingcap.com/force-upgrade is not \"true\", but %q", val) + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for annotation tidb.pingcap.com/force-upgrade") + + ginkgo.By("Update PD Pod to correct image") + err = controller.GuaranteedUpdate(genericCli, tc, func() error { + tc.Spec.PD.BaseImage = "pingcap/pd" + return nil + }) + framework.ExpectNoError(err, "failed to change PD Pod image") + err = oa.WaitForTidbClusterReady(tc, 10*time.Minute, 10*time.Second) + framework.ExpectNoError(err, "failed to wait for TidbCluster ready: %q", tc.Name) + }) + }) + ginkgo.It("Deleted objects controlled by TidbCluster will be recovered by Operator", func() { ginkgo.By("Deploy initial tc") tc := fixture.GetTidbCluster(ns, "delete-objects", utilimage.TiDBV4) @@ -1736,10 +2095,12 @@ func validatePodSpread(pods []corev1.Pod, nodeZoneMap map[string]string, compone return nil } +// TODO: deprecate this func newTidbClusterConfig(cfg *tests.Config, ns, clusterName, password, tcVersion string) tests.TidbClusterConfig { return tests.TidbClusterConfig{ - Namespace: ns, - ClusterName: clusterName, + Namespace: ns, + ClusterName: clusterName, + EnablePVReclaim: false, OperatorTag: cfg.OperatorTag, PDImage: fmt.Sprintf("pingcap/pd:%s", tcVersion), diff --git a/tests/e2e/util/tidbcluster/tidbcluster.go b/tests/e2e/util/tidbcluster/tidbcluster.go index bc2cda1909..83ccc487eb 100644 --- a/tests/e2e/util/tidbcluster/tidbcluster.go +++ b/tests/e2e/util/tidbcluster/tidbcluster.go @@ -67,8 +67,8 @@ func IsTidbClusterAvaiable(tc *v1alpha1.TidbCluster, minReadyDuration time.Durat return false } -// WaitForTidbClusterReady waits for a TidbClusterCondition to be ready for at least minReadyDuration duration. -func WaitForTidbClusterReady(c versioned.Interface, ns, name string, timeout time.Duration, minReadyDuration time.Duration) error { +// WaitForTidbClusterConditionReady waits for a TidbClusterCondition to be ready for at least minReadyDuration duration. +func WaitForTidbClusterConditionReady(c versioned.Interface, ns, name string, timeout time.Duration, minReadyDuration time.Duration) error { return WaitForTidbClusterCondition(c, ns, name, timeout, func(tc *v1alpha1.TidbCluster) (bool, error) { return IsTidbClusterAvaiable(tc, minReadyDuration, time.Now()), nil }) @@ -81,3 +81,42 @@ func MustCreateTCWithComponentsReady(cli ctrlCli.Client, oa *tests.OperatorActio err = oa.WaitForTidbClusterReady(tc, timeout, pollInterval) framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s components ready", tc.Namespace, tc.Name) } + +func MustWaitForPDPhase(c versioned.Interface, tc *v1alpha1.TidbCluster, phase v1alpha1.MemberPhase, timeout, pollInterval time.Duration) { + var err error + wait.Poll(pollInterval, timeout, func() (bool, error) { + tc, err := c.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster: %v", err) + if tc.Status.PD.Phase != phase { + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s .Status.PD.Phase to be %q", tc.Namespace, tc.Name, v1alpha1.ScalePhase) +} + +func MustWaitForTiKVPhase(c versioned.Interface, tc *v1alpha1.TidbCluster, phase v1alpha1.MemberPhase, timeout, pollInterval time.Duration) { + var err error + wait.Poll(pollInterval, timeout, func() (bool, error) { + tc, err := c.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster: %v", err) + if tc.Status.TiKV.Phase != phase { + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s .Status.TiKV.Phase to be %q", tc.Namespace, tc.Name, v1alpha1.ScalePhase) +} + +func MustWaitForTiDBPhase(c versioned.Interface, tc *v1alpha1.TidbCluster, phase v1alpha1.MemberPhase, timeout, pollInterval time.Duration) { + var err error + wait.Poll(pollInterval, timeout, func() (bool, error) { + tc, err := c.PingcapV1alpha1().TidbClusters(tc.Namespace).Get(tc.Name, metav1.GetOptions{}) + framework.ExpectNoError(err, "failed to get TidbCluster: %v", err) + if tc.Status.TiDB.Phase != phase { + return false, nil + } + return true, nil + }) + framework.ExpectNoError(err, "failed to wait for TidbCluster %s/%s .Status.TiDB.Phase to be %q", tc.Namespace, tc.Name, v1alpha1.ScalePhase) +} diff --git a/tests/pkg/fixture/fixture.go b/tests/pkg/fixture/fixture.go index a933bac889..3c8d01ad7c 100644 --- a/tests/pkg/fixture/fixture.go +++ b/tests/pkg/fixture/fixture.go @@ -127,7 +127,7 @@ func GetTidbCluster(ns, name, version string) *v1alpha1.TidbCluster { ComponentSpec: v1alpha1.ComponentSpec{ Affinity: buildAffinity(name, ns, v1alpha1.TiKVMemberType), }, - EvictLeaderTimeout: pointer.StringPtr("3m"), + EvictLeaderTimeout: pointer.StringPtr("1m"), }, TiDB: &v1alpha1.TiDBSpec{ From 6f1f0916f42c92749b6d378db3e287fced8575af Mon Sep 17 00:00:00 2001 From: Mike <842725815@qq.com> Date: Tue, 27 Apr 2021 15:17:56 +0800 Subject: [PATCH 4/4] tidbmonitor add readiness probe (#3943) --- pkg/monitor/monitor/util.go | 50 ++++++++++++++++++++++++++++++++ pkg/monitor/monitor/util_test.go | 35 ++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/pkg/monitor/monitor/util.go b/pkg/monitor/monitor/util.go index 2c7fc031f9..4e3bca8dc1 100644 --- a/pkg/monitor/monitor/util.go +++ b/pkg/monitor/monitor/util.go @@ -479,6 +479,25 @@ func getMonitorPrometheusContainer(monitor *v1alpha1.TidbMonitor, tc *v1alpha1.T commands = append(commands, "--storage.tsdb.max-block-duration=2h") commands = append(commands, "--storage.tsdb.min-block-duration=2h") } + + //Add readiness probe. LivenessProbe probe will affect prom wal replay,ref: https://github.com/prometheus-operator/prometheus-operator/pull/3502 + var readinessProbeHandler core.Handler + { + readyPath := "/-/ready" + readinessProbeHandler.HTTPGet = &core.HTTPGetAction{ + Path: readyPath, + Port: intstr.FromInt(9090), + } + + } + readinessProbe := &core.Probe{ + Handler: readinessProbeHandler, + TimeoutSeconds: 3, + PeriodSeconds: 5, + FailureThreshold: 120, // Allow up to 10m on startup for data recovery + } + c.ReadinessProbe = readinessProbe + c.Command = append(c.Command, strings.Join(commands, " ")) if monitor.Spec.Prometheus.ImagePullPolicy != nil { c.ImagePullPolicy = *monitor.Spec.Prometheus.ImagePullPolicy @@ -555,6 +574,37 @@ func getMonitorGrafanaContainer(secret *core.Secret, monitor *v1alpha1.TidbMonit }, }, } + + var probeHandler core.Handler + { + readyPath := "/api/health" + probeHandler.HTTPGet = &core.HTTPGetAction{ + Path: readyPath, + Port: intstr.FromInt(3000), + } + + } + //add readiness probe + readinessProbe := &core.Probe{ + Handler: probeHandler, + TimeoutSeconds: 5, + PeriodSeconds: 10, + SuccessThreshold: 1, + } + c.ReadinessProbe = readinessProbe + + //add liveness probe + livenessProbe := &core.Probe{ + Handler: probeHandler, + TimeoutSeconds: 5, + FailureThreshold: 10, + PeriodSeconds: 10, + SuccessThreshold: 1, + InitialDelaySeconds: 30, + } + + c.LivenessProbe = livenessProbe + if monitor.Spec.Grafana.ImagePullPolicy != nil { c.ImagePullPolicy = *monitor.Spec.Grafana.ImagePullPolicy } diff --git a/pkg/monitor/monitor/util_test.go b/pkg/monitor/monitor/util_test.go index 713cfdc6f5..d9662a22c8 100644 --- a/pkg/monitor/monitor/util_test.go +++ b/pkg/monitor/monitor/util_test.go @@ -957,6 +957,17 @@ func TestGetMonitorPrometheusContainer(t *testing.T) { }, }, Resources: corev1.ResourceRequirements{}, + ReadinessProbe: &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/-/ready", + Port: intstr.FromInt(9090), + }, + }, + TimeoutSeconds: 3, + PeriodSeconds: 5, + FailureThreshold: 120, // Allow up to 10m on startup for data recovery + }, VolumeMounts: []corev1.VolumeMount{ { Name: "prometheus-config-out", @@ -1100,6 +1111,30 @@ func TestGetMonitorGrafanaContainer(t *testing.T) { MountPath: "/grafana-dashboard-definitions/tidb", }, }, + ReadinessProbe: &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/api/health", + Port: intstr.FromInt(3000), + }, + }, + TimeoutSeconds: 5, + PeriodSeconds: 10, + SuccessThreshold: 1, + }, + LivenessProbe: &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/api/health", + Port: intstr.FromInt(3000), + }, + }, + TimeoutSeconds: 5, + FailureThreshold: 10, + PeriodSeconds: 10, + SuccessThreshold: 1, + InitialDelaySeconds: 30, + }, }, }, }