Skip to content

Commit

Permalink
feat(monitor): support to monitor vm and expose influxdb svc as nodep…
Browse files Browse the repository at this point in the history
…ort type (#2116)

* feat(monitor): support monitor kubevirt vms

support to collect vm metrics from libvirt level

Signed-off-by: willzgli <willzgli@tencent.com>

* fix(monitor): support configure vm alert rule

1. fix alert rule expression for vm
2. add vm name in alert message
3. round values in prometheus alert annotation to two decimals

Signed-off-by: willzgli <willzgli@tencent.com>

* fix(monitor): support edit vm alarmpolices

* feat(monitor): set retention days through configration file

* fix(monitor): add metric vm_memory_usage

* fix(monitor): support to visit influxdb with vip

* fix(monitor): expose influxdb svc as nodeport

expose influxdb svc as nodeport in non-ha mode  global cluster
and ha mode global cluster. it will unify the remote writing
address for promethues in global cluster and business cluster.

Signed-off-by: willzgli <willzgli@tencent.com>

Signed-off-by: willzgli <willzgli@tencent.com>
Co-authored-by: willzgli <willzgli@tencent.com>
  • Loading branch information
willzgli and willzgli authored Nov 14, 2022
1 parent 0602f2b commit e1d681d
Show file tree
Hide file tree
Showing 20 changed files with 234 additions and 31 deletions.
19 changes: 14 additions & 5 deletions cmd/tke-installer/app/installer/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1801,7 +1801,7 @@ func (t *TKE) getTKEPlatformControllerOptions(ctx context.Context) map[string]in
if t.Para.Config.Monitor.InfluxDBMonitor != nil {
options["monitorStorageType"] = "influxdb"
if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
options["monitorStorageAddresses"] = fmt.Sprintf("http://%s:8086", t.servers[0])
options["monitorStorageAddresses"] = t.getLocalInfluxdbAddress()
} else if t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor != nil {
address := t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.URL
if t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Username != "" {
Expand Down Expand Up @@ -1945,7 +1945,6 @@ func (t *TKE) getInfluxDBOptions(ctx context.Context) (map[string]interface{}, e
}
options["nodeName"] = node.Name
}

return options, nil
}

Expand Down Expand Up @@ -2022,7 +2021,7 @@ func (t *TKE) installTKEMonitorAPI(ctx context.Context) error {
options["StoragePassword"] = string(t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Password)
} else if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
// todo
options["StorageAddress"] = fmt.Sprintf("http://%s:8086", t.servers[0])
options["StorageAddress"] = t.getLocalInfluxdbAddress()
}
} else if t.Para.Config.Monitor.ThanosMonitor != nil {
options["StorageType"] = "thanos"
Expand Down Expand Up @@ -2085,15 +2084,16 @@ func (t *TKE) installTKEMonitorController(ctx context.Context) error {
}
params["MonitorStorageAddresses"] = address
} else if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
params["StorageAddress"] = fmt.Sprintf("http://%s:8086", t.servers[0])
params["MonitorStorageAddresses"] = fmt.Sprintf("http://%s:8086", t.servers[0])
params["StorageAddress"] = t.getLocalInfluxdbAddress()
params["MonitorStorageAddresses"] = t.getLocalInfluxdbAddress()
}
} else if t.Para.Config.Monitor.ThanosMonitor != nil {
params["StorageType"] = "thanos"
params["MonitorStorageType"] = "thanos"
// thanos-query address
params["MonitorStorageAddresses"] = "http://thanos-query.tke.svc.cluster.local:9090"
}
params["RetentionDays"] = t.Para.Config.Monitor.RetentionDays //can accept a nil value
}

if err := apiclient.CreateResourceWithDir(ctx, t.globalClient, "manifests/tke-monitor-controller/*.yaml", params); err != nil {
Expand Down Expand Up @@ -2872,3 +2872,12 @@ func (t *TKE) patchClusterInfo(ctx context.Context, patchData interface{}) error
_, err = t.globalClient.CoreV1().ConfigMaps("kube-public").Patch(ctx, "cluster-info", k8stypes.MergePatchType, patchByte, metav1.PatchOptions{})
return err
}

func (t *TKE) getLocalInfluxdbAddress() string {
var influxdbAddress string = fmt.Sprintf("http://%s:30086", t.servers[0])
if t.Para.Config.HA != nil && len(t.Para.Config.HA.VIP()) > 0 {
vip := t.Para.Config.HA.VIP()
influxdbAddress = fmt.Sprintf("http://%s:30086", vip) // influxdb svc must be set as NodePort type, and the nodePort is 30086
}
return influxdbAddress
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ spec:
{{- if .Values.baremetalStorage }}
nodeName: {{ .Values.nodeName }}
{{- end }}
hostNetwork: true
volumes:
- name: data
{{- if .Values.baremetalStorage }}
Expand Down Expand Up @@ -83,4 +82,21 @@ spec:
requests:
storage: 40Gi
storageClassName: {{ .Values.nfsStorageClassName }}
{{- end }}
{{- end }}

---
apiVersion: v1
kind: Service
metadata:
name: influxdb
namespace: tke
spec:
ports:
- name: remote-write
port: 8086
targetPort: 8086
protocol: TCP
nodePort: 30086
type: NodePort
selector:
app: influxdb
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ data:
storage:
{{- if eq .StorageType "influxDB" }}
influxDB:
{{- if .RetentionDays }}
retentionDays: {{ .RetentionDays }}
{{- end }}
servers:
- address: {{ .StorageAddress }}
username: {{ .StorageUsername }}
Expand Down
1 change: 1 addition & 0 deletions cmd/tke-installer/app/installer/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ type Monitor struct {
ThanosMonitor *ThanosMonitor `json:"thanos,omitempty"`
ESMonitor *ESMonitor `json:"es,omitempty"`
InfluxDBMonitor *InfluxDBMonitor `json:"influxDB,omitempty"`
RetentionDays *int `json:"retentionDays,omitempty"`
}

type ThanosMonitor struct {
Expand Down
9 changes: 8 additions & 1 deletion cmd/tke-monitor-controller/app/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ package config

import (
"fmt"
"net"

"k8s.io/apiserver/pkg/authentication/request/anonymous"
"k8s.io/apiserver/pkg/authorization/authorizerfactory"
apiserver "k8s.io/apiserver/pkg/server"
restclient "k8s.io/client-go/rest"
"net"
versionedclientset "tkestack.io/tke/api/client/clientset/versioned"
monitorapiconfig "tkestack.io/tke/cmd/tke-monitor-api/app/config"
monitorapioptions "tkestack.io/tke/cmd/tke-monitor-api/app/options"
Expand Down Expand Up @@ -85,11 +86,17 @@ func CreateConfigFromOptions(serverName string, opts *options.Options) (*Config,

// load config file, if provided
if configFile := opts.MonitorConfig; len(configFile) > 0 {
var defaultRetentionDays int = 15 //set default retention value as 15 days for influxdb
monitorConfig, err = monitorapiconfig.LoadConfigFile(configFile)
if err != nil {
log.Error("Failed to load monitor configuration file", log.String("configFile", configFile), log.Err(err))
return nil, err
}

if monitorConfig.Storage.InfluxDB != nil && monitorConfig.Storage.InfluxDB.RetentionDays == nil {
log.Info("don't set retention times in config, use the default one")
monitorConfig.Storage.InfluxDB.RetentionDays = &defaultRetentionDays
}
}

// We always validate the local configuration (command line + config file).
Expand Down
6 changes: 4 additions & 2 deletions cmd/tke-monitor-controller/app/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ package app

import (
"fmt"
"net/http"
"time"

"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/wait"
cacheddiscovery "k8s.io/client-go/discovery/cached"
"k8s.io/client-go/rest"
"k8s.io/client-go/restmapper"
"net/http"
"time"
versionedclientset "tkestack.io/tke/api/client/clientset/versioned"
businessv1 "tkestack.io/tke/api/client/clientset/versioned/typed/business/v1"
platformv1 "tkestack.io/tke/api/client/clientset/versioned/typed/platform/v1"
Expand Down Expand Up @@ -77,6 +78,7 @@ type ControllerContext struct {
// Remote write/read address for prometheus
RemoteAddresses []string
RemoteType string
RententionDays int
}

// IsControllerEnabled returns whether the controller has been enabled
Expand Down
12 changes: 9 additions & 3 deletions cmd/tke-monitor-controller/app/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
package app

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"net/http"
"time"
"tkestack.io/tke/api/monitor/v1"

"k8s.io/apimachinery/pkg/runtime/schema"
v1 "tkestack.io/tke/api/monitor/v1"
"tkestack.io/tke/pkg/monitor/controller/metric"
"tkestack.io/tke/pkg/monitor/controller/prometheus"
"tkestack.io/tke/pkg/monitor/storage"
Expand Down Expand Up @@ -65,14 +66,19 @@ func startPrometheusController(ctx ControllerContext) (http.Handler, bool, error
return nil, false, nil
}

var retentionDays int = -1 //retentionDays is used in controller only when storage is influx db
if ctx.MonitorConfig.Storage.InfluxDB != nil {
retentionDays = *ctx.MonitorConfig.Storage.InfluxDB.RetentionDays
}

ctrl := prometheus.NewController(
ctx.ClientBuilder.ClientOrDie("prometheus-controller"),
ctx.PlatformClient,
ctx.InformerFactory.Monitor().V1().Prometheuses(),
promEventSyncPeriod,

ctx.RemoteAddresses,
ctx.RemoteType,
retentionDays,
)

go func() {
Expand Down
15 changes: 12 additions & 3 deletions cmd/tke-upgrade/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (t *TKE) TKEPlatformController() (option Options) {
if t.Para.Config.Monitor.InfluxDBMonitor != nil {
option["MonitorStorageType"] = "influxdb"
if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
option["MonitorStorageAddresses"] = fmt.Sprintf("http://%s:8086", t.Servers[0])
option["MonitorStorageAddresses"] = t.getLocalInfluxdbAddress()
} else if t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor != nil {
address := t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.URL
if t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Username != "" {
Expand Down Expand Up @@ -247,7 +247,7 @@ func (t *TKE) TKEMonitorAPI() (option Options) {
option["StoragePassword"] = string(t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Password)
} else if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
// todo
option["StorageAddress"] = fmt.Sprintf("http://%s:8086", t.Servers[0])
option["StorageAddress"] = t.getLocalInfluxdbAddress()
}
}
}
Expand Down Expand Up @@ -275,7 +275,7 @@ func (t *TKE) TKEMonitorController() (option Options) {
option["StorageUsername"] = t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Username
option["StoragePassword"] = string(t.Para.Config.Monitor.InfluxDBMonitor.ExternalInfluxDBMonitor.Password)
} else if t.Para.Config.Monitor.InfluxDBMonitor.LocalInfluxDBMonitor != nil {
option["StorageAddress"] = fmt.Sprintf("http://%s:8086", t.Servers[0])
option["StorageAddress"] = t.getLocalInfluxdbAddress()
}
}
}
Expand Down Expand Up @@ -321,3 +321,12 @@ func (t *TKE) TKERegistryAPI() (option Options) {
}
return
}

func (t *TKE) getLocalInfluxdbAddress() string {
var influxdbAddress string = fmt.Sprintf("http://%s:30086", t.Servers[0])
if t.Para.Config.HA != nil && len(t.Para.Config.HA.VIP()) > 0 {
vip := t.Para.Config.HA.VIP()
influxdbAddress = fmt.Sprintf("http://%s:30086", vip) // influxdb svc must be set as NodePort type, and the nodePort is 30086
}
return influxdbAddress
}
1 change: 1 addition & 0 deletions pkg/monitor/apis/config/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,6 @@ var (
"Storage.InfluxDB.Servers[*].TimeoutSeconds",
"Storage.InfluxDB.Servers[*].Username",
"Storage.Thanos.Servers[*].Address",
"Storage.InfluxDB.RetentionDays",
)
)
3 changes: 2 additions & 1 deletion pkg/monitor/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ type ThanosStorageServer struct {
}

type InfluxDBStorage struct {
Servers []InfluxDBStorageServer
RetentionDays *int
Servers []InfluxDBStorageServer
}

type InfluxDBStorageServer struct {
Expand Down
4 changes: 3 additions & 1 deletion pkg/monitor/apis/config/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ type ThanosStorageServer struct {
}

type InfluxDBStorage struct {
Servers []InfluxDBStorageServer `json:"servers"`
// +optional
RetentionDays *int `json:"retentionDays,omitempty"`
Servers []InfluxDBStorageServer `json:"servers"`
}

type InfluxDBStorageServer struct {
Expand Down
2 changes: 2 additions & 0 deletions pkg/monitor/apis/config/v1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/monitor/apis/config/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/monitor/apis/config/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkg/monitor/config/configfiles/configfiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ package configfiles

import (
"fmt"
"k8s.io/apimachinery/pkg/runtime/serializer"
"path/filepath"

"k8s.io/apimachinery/pkg/runtime/serializer"
monitorconfig "tkestack.io/tke/pkg/monitor/apis/config"
monitorscheme "tkestack.io/tke/pkg/monitor/apis/config/scheme"
"tkestack.io/tke/pkg/monitor/config/codec"
Expand Down
Loading

0 comments on commit e1d681d

Please sign in to comment.