Skip to content

Commit

Permalink
Add telemetry-controller
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Schrodi committed Sep 12, 2019
1 parent 16c0ae3 commit eab2db7
Show file tree
Hide file tree
Showing 24 changed files with 1,627 additions and 1 deletion.
3 changes: 3 additions & 0 deletions cmd/shoot-telemetry/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dev
bin
TODO
19 changes: 19 additions & 0 deletions cmd/shoot-telemetry/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

WORKDIR := $(shell pwd)

.PHONY: build
build:
@mkdir -p $(WORKDIR)/bin
@CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build \
-mod=vendor \
-o $(WORKDIR)/bin/garden-shoot-telemetry-linux-amd64 \
$(WORKDIR)/main.go
@CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 go build \
-o $(WORKDIR)/bin/garden-shoot-telemetry-darwin-amd64 \
$(WORKDIR)/main.go

.PHONY: revendor
revendor:
@GO111MODULE=on go mod vendor
@GO111MODULE=on go mod tidy
@rm -f vendor/github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1/zz_generated.conversion.go
38 changes: 38 additions & 0 deletions cmd/shoot-telemetry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Garden Shoot Telemetry Controller

A telemetry controller to get granular insights of Shoot apiserver and etcd availability.

The measurements will be persistent by appending a `results.csv` file in the passed output directory.
The controller is keeping the measurements for 30 seconds in memory, before it appends the data to the `result.csv`.

**Disclaimer: Please keep in mind this is still on a prototype level**

### Build and Run
```sh
# Build
make build

# Run
./bin/garden-shoot-telemetry-<linux|darwin>-amd64 \
--kubeconfig <path-to-kubeconfig-for-garden-cluster> \
--output <directory-to-write-measurements-csv-file> \
--interval 5s
```

### Analyse the Data
When the controller process receives a SIGTERM signal it writes the remaining data in memory to disk.
After that the analyse functionality will be invoked, which will calculate and print statistical key figures like min/max, avg, etc. for the unhealthy periods of each cluster to stdout or into a passed file.

The analysis functionality can also be used to anlayse existing mesaurment files.
Lets check out the example data in `example/measurments.csv`.

The analysis of the example data can be manually triggered by running the following command:
```sh
./bin/garden-shoot-telemetry-<linux|darwin>-amd64 \
analyse
--input example/measurements.csv
```

The `example/measurements.csv` file contains data for one cluster with four unhealthy periods.

You should see the name of the cluster, the count of unhealthy periods, the shortest(min)/largest(max) unhealthy period and the average, median and standard deviation of the durations for the unhealthy periods of the clusters.
10 changes: 10 additions & 0 deletions cmd/shoot-telemetry/example/measurements.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
garden-example/example-1,aws,aws-1,2019-04-23T07:16:01Z,200
garden-example/example-1,aws,aws-1,2019-04-23T07:16:06Z,500
garden-example/example-1,aws,aws-1,2019-04-23T07:16:11Z,200
garden-example/example-1,aws,aws-1,2019-04-23T07:16:16Z,500
garden-example/example-1,aws,aws-1,2019-04-23T07:16:21Z,200
garden-example/example-1,aws,aws-1,2019-04-23T07:16:26Z,400
garden-example/example-1,aws,aws-1,2019-04-23T07:16:31Z,200
garden-example/example-1,aws,aws-1,2019-04-23T07:16:36Z,302
garden-example/example-1,aws,aws-1,2019-04-23T07:16:41Z,302
garden-example/example-1,aws,aws-1,2019-04-23T07:16:46Z,200
30 changes: 30 additions & 0 deletions cmd/shoot-telemetry/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2019 Copyright (c) 2019 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
log "github.com/sirupsen/logrus"

"github.com/gardener/test-infra/pkg/shoot-telemetry/cmd"
)

func main() {
var rootCmd = cmd.GetRootCommand()
rootCmd.AddCommand(cmd.GetAnalyseCommand())

if err := rootCmd.Execute(); err != nil {
log.Fatal(err.Error())
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/gardener/test-infra

go 1.12
go 1.13

require (
cloud.google.com/go v0.43.0
Expand Down
22 changes: 22 additions & 0 deletions pkg/logger/helper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2019 Copyright (c) 2019 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package logger

import "fmt"

func Logf(logFunc func(msg string, keysAndValues ...interface{}), format string, a ...interface{}) {
message := fmt.Sprintf(format, a...)
logFunc(message)
}
120 changes: 120 additions & 0 deletions pkg/shoot-telemetry/analyse/figures.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2019 Copyright (c) 2019 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyse

import (
"math"
"sort"
)

type figures struct {
Name string `json:"name"`
Provider string `json:"provider"`
Seed string `json:"seed"`
CountUnhealthyPeriods int `json:"countUnhealthyPeriods"`
CountRequests int `json:"countRequest"`
CountTimeouts int `json:"countRequestTimeouts"`
DownPeriods *figuresDowntimePeriods `json:"downTimesSec"`
ResponseTimeDuration *figuresResponseTimeDuration `json:"responseTimesMs"`

downPeriodsStore durationList
requestDurationStore responseTimeList
}

type figuresResponseTimeDuration struct {
Min int `json:"min"`
Max int `json:"max"`
Avg float64 `json:"avg"`
Median float64 `json:"median"`
Std float64 `json:"std"`
}

type figuresDowntimePeriods struct {
Min float64 `json:"min"`
Max float64 `json:"max"`
Avg float64 `json:"avg"`
Median float64 `json:"median"`
Std float64 `json:"std"`
}

func (f *figures) calculateDownPeriodStatistics() {
if f.CountUnhealthyPeriods < 1 {
return
}
f.DownPeriods = &figuresDowntimePeriods{}
sort.Sort(f.downPeriodsStore)

var sum, sumSqrt, avg, variance float64
for _, o := range f.downPeriodsStore {
sum += o.Seconds()
}
avg = sum / float64(f.CountUnhealthyPeriods)

// Min, Max and Avg
f.DownPeriods.Min = f.downPeriodsStore[0].Seconds()
f.DownPeriods.Max = f.downPeriodsStore[f.CountUnhealthyPeriods-1].Seconds()
f.DownPeriods.Avg = avg

// Median
if f.CountUnhealthyPeriods%2 != 0 {
f.DownPeriods.Median = f.downPeriodsStore[f.CountUnhealthyPeriods/2].Seconds()
} else {
f.DownPeriods.Median = (f.downPeriodsStore[f.CountUnhealthyPeriods/2].Seconds() + f.downPeriodsStore[f.CountUnhealthyPeriods/2-1].Seconds()) / 2
}

// Standard Deviation
for _, o := range f.downPeriodsStore {
sumSqrt += math.Pow(o.Seconds()-avg, 2)
}
variance = sumSqrt / float64(f.CountUnhealthyPeriods)
f.DownPeriods.Std = math.Sqrt(variance)
}

func (f *figures) calculateResponseTimeStatistics() {
if f.CountRequests-f.CountTimeouts < 1 {
return
}
f.ResponseTimeDuration = &figuresResponseTimeDuration{}
sort.Sort(f.requestDurationStore)

var (
sum int
sumSqrt, avg, variance float64
len = len(f.requestDurationStore)
)
for _, d := range f.requestDurationStore {
sum += *d
}
avg = float64(sum / f.CountRequests)

// Min, Max, Avg
f.ResponseTimeDuration.Min = *f.requestDurationStore[0]
f.ResponseTimeDuration.Max = *f.requestDurationStore[len-1]
f.ResponseTimeDuration.Avg = avg

// Median
if f.CountRequests%2 != 0 {
f.ResponseTimeDuration.Median = float64(*f.requestDurationStore[len/2])
} else {
f.ResponseTimeDuration.Median = float64((*f.requestDurationStore[len/2] + *f.requestDurationStore[len/2-1]) / 2)
}

// Standard Deviation
for _, o := range f.requestDurationStore {
sumSqrt += math.Pow(float64(*o)-avg, 2)
}
variance = sumSqrt / float64(f.CountRequests)
f.ResponseTimeDuration.Std = math.Sqrt(variance)
}
45 changes: 45 additions & 0 deletions pkg/shoot-telemetry/analyse/listTypes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright 2019 Copyright (c) 2019 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyse

import "time"

type durationList []time.Duration

func (d durationList) Less(i, j int) bool {
return d[i].Seconds() < d[j].Seconds()
}

func (d durationList) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}

func (d durationList) Len() int {
return len(d)
}

type responseTimeList []*int

func (d responseTimeList) Less(i, j int) bool {
return *d[i] < *d[j]
}

func (d responseTimeList) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}

func (d responseTimeList) Len() int {
return len(d)
}
Loading

0 comments on commit eab2db7

Please sign in to comment.