Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add disruption check for Thanos Querier API #29391

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/legacykubeapiservermonitortests"
"github.com/openshift/origin/pkg/monitortests/machines/watchmachines"
"github.com/openshift/origin/pkg/monitortests/monitoring/disruptionmetricsapi"
"github.com/openshift/origin/pkg/monitortests/monitoring/disruptionthanosquerierapi"
"github.com/openshift/origin/pkg/monitortests/monitoring/statefulsetsrecreation"
"github.com/openshift/origin/pkg/monitortests/network/disruptioningress"
"github.com/openshift/origin/pkg/monitortests/network/disruptionpodnetwork"
Expand Down Expand Up @@ -131,6 +132,7 @@ func newDefaultMonitorTests(info monitortestframework.MonitorTestInitializationI

monitorTestRegistry.AddMonitorTestOrDie("monitoring-statefulsets-recreation", "Monitoring", statefulsetsrecreation.NewStatefulsetsChecker())
monitorTestRegistry.AddMonitorTestOrDie("metrics-api-availability", "Monitoring", disruptionmetricsapi.NewAvailabilityInvariant())
monitorTestRegistry.AddMonitorTestOrDie("thanos-querier-api-availability", "Monitoring", disruptionthanosquerierapi.NewAvailabilityInvariant())
monitorTestRegistry.AddMonitorTestOrDie(apiunreachablefromclientmetrics.MonitorName, "kube-apiserver", apiunreachablefromclientmetrics.NewMonitorTest())
monitorTestRegistry.AddMonitorTestOrDie(faultyloadbalancer.MonitorName, "kube-apiserver", faultyloadbalancer.NewMonitorTest())

Expand Down
126 changes: 126 additions & 0 deletions pkg/monitortests/monitoring/disruptionthanosquerierapi/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package disruptionthanosquerierapi

import (
"context"
"fmt"
"time"

"github.com/openshift/origin/pkg/monitortestframework"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

"github.com/openshift/origin/pkg/monitor/backenddisruption"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestlibrary/disruptionlibrary"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
)

const (
monitoringNamespace = "openshift-monitoring"
thanosQuerierName = "thanos-querier"
)

type availability struct {
disruptionChecker *disruptionlibrary.Availability
notSupportedReason error
}

func NewAvailabilityInvariant() monitortestframework.MonitorTest {
return &availability{}
}

func createRouteBackendSampler(clusterConfig *rest.Config, namespace, name, disruptionBackendName, path string, connectionType monitorapi.BackendConnectionType) (*backenddisruption.BackendSampler, error) {
backendSampler := backenddisruption.NewRouteBackend(
clusterConfig,
namespace,
name,
disruptionBackendName,
path,
connectionType).
WithUserAgent(fmt.Sprintf("openshift-external-backend-sampler-%s-%s", connectionType, disruptionBackendName)).
// Auth isn't configured. An Unauthorized response should be enough to indicate that the Route's backend is reachable.
WithExpectedStatusCode(401)
return backendSampler, nil
}

func (w *availability) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
var err error

kubeClient, err := kubernetes.NewForConfig(adminRESTConfig)
if err != nil {
return err
}

deploymentScale, err := kubeClient.AppsV1().Deployments(monitoringNamespace).GetScale(ctx, thanosQuerierName, metav1.GetOptions{})
if err != nil {
return err
}
// Skip for single replica Deployments.
if deploymentScale.Spec.Replicas == 1 {
w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: fmt.Sprintf("%s only has a single replica", deploymentScale.Name)}
return w.notSupportedReason
}

disruptionBackedName := "thanos-querier-api"
newConnectionTestName := fmt.Sprintf("[sig-instrumentation] disruption/%s connection/new should be available throughout the test", disruptionBackedName)
reusedConnectionTestName := fmt.Sprintf("[sig-instrumentation] disruption/%s connection/reused should be available throughout the test", disruptionBackedName)
path := "/api"

newConnections, err := createRouteBackendSampler(adminRESTConfig, monitoringNamespace, thanosQuerierName, disruptionBackedName, path, monitorapi.NewConnectionType)
if err != nil {
return err
}
reusedConnections, err := createRouteBackendSampler(adminRESTConfig, monitoringNamespace, thanosQuerierName, disruptionBackedName, path, monitorapi.ReusedConnectionType)
if err != nil {
return err
}

w.disruptionChecker = disruptionlibrary.NewAvailabilityInvariant(
newConnectionTestName, reusedConnectionTestName,
newConnections, reusedConnections,
)

if err := w.disruptionChecker.StartCollection(ctx, adminRESTConfig, recorder); err != nil {
return err
}

return nil
}

func (w *availability) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
if w.notSupportedReason != nil {
return nil, nil, w.notSupportedReason
}
// we failed and indicated it during setup.
if w.disruptionChecker == nil {
return nil, nil, nil
}

return w.disruptionChecker.CollectData(ctx)
}

func (w *availability) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
return nil, w.notSupportedReason
}

func (w *availability) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
if w.notSupportedReason != nil {
return nil, w.notSupportedReason
}
// we failed and indicated it during setup.
if w.disruptionChecker == nil {
return nil, nil
}

return w.disruptionChecker.EvaluateTestsFromConstructedIntervals(ctx, finalIntervals)
}

func (w *availability) WriteContentToStorage(ctx context.Context, storageDir string, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
return w.notSupportedReason
}

func (w *availability) Cleanup(ctx context.Context) error {
return w.notSupportedReason
}