Skip to content

Commit

Permalink
Merge pull request grafana#1057 from joshuasimon-taulia/tempo-autosca…
Browse files Browse the repository at this point in the history
…ling

[tempo-distributed] feat: support for autoscaling
  • Loading branch information
zanhsieh authored May 8, 2022
2 parents a2cd23c + f3bf065 commit b1b142e
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 3 deletions.
2 changes: 1 addition & 1 deletion charts/tempo-distributed/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: tempo-distributed
description: Grafana Tempo in MicroService mode
type: application
version: 0.17.1
version: 0.17.2
appVersion: 1.4.0
engine: gotpl
home: https://grafana.com/docs/tempo/latest/
Expand Down
22 changes: 21 additions & 1 deletion charts/tempo-distributed/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ The memcached default args are removed and should be provided manually. The sett
| compactor.tolerations | list | `[]` | Tolerations for compactor pods |
| config | string | `"multitenancy_enabled: false\nsearch_enabled: {{ .Values.search.enabled }}\ncompactor:\n compaction:\n block_retention: {{ .Values.compactor.config.compaction.block_retention }}\n ring:\n kvstore:\n store: memberlist\ndistributor:\n ring:\n kvstore:\n store: memberlist\n receivers:\n {{- if or (.Values.traces.jaeger.thriftCompact) (.Values.traces.jaeger.thriftBinary) (.Values.traces.jaeger.thriftHttp) (.Values.traces.jaeger.grpc) }}\n jaeger:\n protocols:\n {{- if .Values.traces.jaeger.thriftCompact }}\n thrift_compact:\n endpoint: 0.0.0.0:6831\n {{- end }}\n {{- if .Values.traces.jaeger.thriftBinary }}\n thrift_binary:\n endpoint: 0.0.0.0:6832\n {{- end }}\n {{- if .Values.traces.jaeger.thriftHttp }}\n thrift_http:\n endpoint: 0.0.0.0:14268\n {{- end }}\n {{- if .Values.traces.jaeger.grpc }}\n grpc:\n endpoint: 0.0.0.0:14250\n {{- end }}\n {{- end }}\n {{- if .Values.traces.zipkin}}\n zipkin:\n endpoint: 0.0.0.0:9411\n {{- end }}\n {{- if or (.Values.traces.otlp.http) (.Values.traces.otlp.grpc) }}\n otlp:\n protocols:\n {{- if .Values.traces.otlp.http }}\n http:\n endpoint: 0.0.0.0:55681\n {{- end }}\n {{- if .Values.traces.otlp.grpc }}\n grpc:\n endpoint: 0.0.0.0:4317\n {{- end }}\n {{- end }}\n {{- if .Values.traces.opencensus }}\n opencensus:\n endpoint: 0.0.0.0:55678\n {{- end }}\n {{- if .Values.traces.kafka }}\n kafka:\n {{- toYaml .Values.traces.kafka | nindent 6 }}\n {{- end }}\nquerier:\n frontend_worker:\n frontend_address: {{ include \"tempo.queryFrontendFullname\" . }}-discovery:9095\n {{- if .Values.querier.config.frontend_worker.grpc_client_config }}\n grpc_client_config:\n {{- toYaml .Values.querier.config.frontend_worker.grpc_client_config | nindent 6 }}\n {{- end }}\ningester:\n lifecycler:\n ring:\n replication_factor: 1\n kvstore:\n store: memberlist\n tokens_file_path: /var/tempo/tokens.json\n {{- if .Values.ingester.config.maxBlockBytes }}\n max_block_bytes: {{ .Values.ingester.config.maxBlockBytes }}\n {{- end }}\n {{- if .Values.ingester.config.maxBlockDuration }}\n max_block_duration: {{ .Values.ingester.config.maxBlockDuration }}\n {{- end }}\n {{- if .Values.ingester.config.completeBlockTimeout }}\n complete_block_timeout: {{ .Values.ingester.config.completeBlockTimeout }}\n {{- end }}\nmemberlist:\n abort_if_cluster_join_fails: false\n join_members:\n - {{ include \"tempo.fullname\" . }}-gossip-ring\noverrides:\n {{- toYaml .Values.global_overrides | nindent 2 }}\nserver:\n http_listen_port: {{ .Values.server.httpListenPort }}\n log_level: {{ .Values.server.logLevel }}\n log_format: {{ .Values.server.logFormat }}\n grpc_server_max_recv_msg_size: {{ .Values.server.grpc_server_max_recv_msg_size }}\n grpc_server_max_send_msg_size: {{ .Values.server.grpc_server_max_send_msg_size }}\nstorage:\n trace:\n backend: {{.Values.storage.trace.backend}}\n {{- if eq .Values.storage.trace.backend \"gcs\"}}\n gcs:\n {{- toYaml .Values.storage.trace.gcs | nindent 6}}\n {{- end}}\n {{- if eq .Values.storage.trace.backend \"s3\"}}\n s3:\n {{- toYaml .Values.storage.trace.s3 | nindent 6}}\n {{- end}}\n {{- if eq .Values.storage.trace.backend \"azure\"}}\n azure:\n {{- toYaml .Values.storage.trace.azure | nindent 6}}\n {{- end}}\n blocklist_poll: 5m\n local:\n path: /var/tempo/traces\n wal:\n path: /var/tempo/wal\n cache: memcached\n memcached:\n consistent_hash: true\n host: {{ include \"tempo.fullname\" . }}-memcached\n service: memcached-client\n timeout: 500ms\n"` | |
| distributor.affinity | string | Hard node and soft zone anti-affinity | Affinity for distributor pods. Passed through `tpl` and, thus, to be configured as string |
| distributor.autoscaling.enabled | bool | `false` | Enable autoscaling for the distributor |
| distributor.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the distributor |
| distributor.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the distributor |
| distributor.autoscaling.targetCPUUtilizationPercentage | int | `60` | Target CPU utilisation percentage for the distributor |
| distributor.autoscaling.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the distributor |
| distributor.extraArgs | list | `[]` | Additional CLI args for the distributor |
| distributor.extraEnv | list | `[]` | Environment variables to add to the distributor pods |
| distributor.extraEnvFrom | list | `[]` | Environment variables from secrets or configmaps to add to the distributor pods |
Expand All @@ -107,6 +112,11 @@ The memcached default args are removed and should be provided manually. The sett
| distributor.terminationGracePeriodSeconds | int | `30` | Grace period to allow the distributor to shutdown before it is killed |
| distributor.tolerations | list | `[]` | Tolerations for distributor pods |
| gateway.affinity | string | Hard node and soft zone anti-affinity | Affinity for gateway pods. Passed through `tpl` and, thus, to be configured as string |
| gateway.autoscaling.enabled | bool | `false` | Enable autoscaling for the gateway |
| gateway.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the gateway |
| gateway.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the gateway |
| gateway.autoscaling.targetCPUUtilizationPercentage | int | `60` | Target CPU utilisation percentage for the gateway |
| gateway.autoscaling.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the gateway |
| gateway.basicAuth.enabled | bool | `false` | Enables basic authentication for the gateway |
| gateway.basicAuth.existingSecret | string | `nil` | Existing basic auth secret to use. Must contain '.htpasswd' |
| gateway.basicAuth.htpasswd | string | `"{{ htpasswd (required \"'gateway.basicAuth.username' is required\" .Values.gateway.basicAuth.username) (required \"'gateway.basicAuth.password' is required\" .Values.gateway.basicAuth.password) }}"` | Uses the specified username and password to compute a htpasswd using Sprig's `htpasswd` function. The value is templated using `tpl`. Override this to use a custom htpasswd, e.g. in case the default causes high CPU load. |
Expand Down Expand Up @@ -157,7 +167,12 @@ The memcached default args are removed and should be provided manually. The sett
| global.priorityClassName | string | `nil` | Overrides the priorityClassName for all pods |
| global_overrides.per_tenant_override_config | string | `"/conf/overrides.yaml"` | |
| ingester.affinity | string | Hard node and soft zone anti-affinity | Affinity for ingester pods. Passed through `tpl` and, thus, to be configured as string |
| ingester.annotations | object | `{}` | Annotations for the ingester StatefulSet |
| ingester.annotations | object | `{}` | Annotations for ingester StatefulSet |
| ingester.autoscaling.enabled | bool | `false` | Enable autoscaling for the ingester |
| ingester.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the ingester |
| ingester.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the ingester |
| ingester.autoscaling.targetCPUUtilizationPercentage | int | `60` | Target CPU utilisation percentage for the ingester |
| ingester.autoscaling.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the ingester |
| ingester.config.complete_block_timeout | string | `nil` | Duration to keep blocks in the ingester after they have been flushed |
| ingester.config.max_block_bytes | string | `nil` | Maximum size of a block before cutting it |
| ingester.config.max_block_duration | string | `nil` | Maximum length of time before cutting a block |
Expand Down Expand Up @@ -224,6 +239,11 @@ The memcached default args are removed and should be provided manually. The sett
| querier.terminationGracePeriodSeconds | int | `30` | Grace period to allow the querier to shutdown before it is killed |
| querier.tolerations | list | `[]` | Tolerations for querier pods |
| queryFrontend.affinity | string | Hard node and soft zone anti-affinity | Affinity for query-frontend pods. Passed through `tpl` and, thus, to be configured as string |
| queryFrontend.autoscaling.enabled | bool | `false` | Enable autoscaling for the query-frontend |
| queryFrontend.autoscaling.maxReplicas | int | `3` | Maximum autoscaling replicas for the query-frontend |
| queryFrontend.autoscaling.minReplicas | int | `1` | Minimum autoscaling replicas for the query-frontend |
| queryFrontend.autoscaling.targetCPUUtilizationPercentage | int | `60` | Target CPU utilisation percentage for the query-frontend |
| queryFrontend.autoscaling.targetMemoryUtilizationPercentage | string | `nil` | Target memory utilisation percentage for the query-frontend |
| queryFrontend.extraArgs | list | `[]` | Additional CLI args for the query-frontend |
| queryFrontend.extraEnv | list | `[]` | Environment variables to add to the query-frontend pods |
| queryFrontend.extraEnvFrom | list | `[]` | Environment variables from secrets or configmaps to add to the query-frontend pods |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ metadata:
{{- end }}
spec:
minReadySeconds: 10
{{- if not .Values.distributor.autoscaling.enabled }}
replicas: {{ .Values.distributor.replicas }}
{{- end }}
revisionHistoryLimit: 10
selector:
matchLabels:
Expand Down
28 changes: 28 additions & 0 deletions charts/tempo-distributed/templates/distributor/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if .Values.distributor.autoscaling.enabled }}
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "tempo.distributorFullname" . }}
labels:
{{- include "tempo.distributorLabels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "tempo.distributorFullname" . }}
minReplicas: {{ .Values.distributor.autoscaling.minReplicas }}
maxReplicas: {{ .Values.distributor.autoscaling.maxReplicas }}
metrics:
{{- with .Values.distributor.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
targetAverageUtilization: {{ . }}
{{- end }}
{{- with .Values.distributor.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
targetAverageUtilization: {{ . }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ metadata:
{{- include "tempo.gatewayLabels" . | nindent 4 }}
spec:
minReadySeconds: 10
{{- if not .Values.gateway.autoscaling.enabled }}
replicas: {{ .Values.gateway.replicas }}
{{- end }}
revisionHistoryLimit: 10
selector:
matchLabels:
Expand Down
28 changes: 28 additions & 0 deletions charts/tempo-distributed/templates/gateway/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if .Values.gateway.autoscaling.enabled }}
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "tempo.gatewayFullname" . }}
labels:
{{- include "tempo.gatewayLabels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "tempo.gatewayFullname" . }}
minReplicas: {{ .Values.gateway.autoscaling.minReplicas }}
maxReplicas: {{ .Values.gateway.autoscaling.maxReplicas }}
metrics:
{{- with .Values.gateway.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
targetAverageUtilization: {{ . }}
{{- end }}
{{- with .Values.gateway.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
targetAverageUtilization: {{ . }}
{{- end }}
{{- end }}
28 changes: 28 additions & 0 deletions charts/tempo-distributed/templates/ingester/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if .Values.ingester.autoscaling.enabled }}
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "tempo.ingesterFullname" . }}
labels:
{{- include "tempo.ingesterLabels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name: {{ include "tempo.ingesterFullname" . }}
minReplicas: {{ .Values.ingester.autoscaling.minReplicas }}
maxReplicas: {{ .Values.ingester.autoscaling.maxReplicas }}
metrics:
{{- with .Values.ingester.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
targetAverageUtilization: {{ . }}
{{- end }}
{{- with .Values.ingester.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
targetAverageUtilization: {{ . }}
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@ metadata:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if not .Values.ingester.autoscaling.enabled }}
replicas: {{ .Values.ingester.replicas }}
{{- end }}
selector:
matchLabels:
{{- include "tempo.ingesterSelectorLabels" . | nindent 6}}
serviceName: ingester
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
rollingUpdate:
partition: 0
template:
metadata:
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ metadata:
{{- end }}
spec:
minReadySeconds: 10
{{- if not .Values.queryFrontend.autoscaling.enabled }}
replicas: {{ .Values.queryFrontend.replicas }}
{{- end }}
revisionHistoryLimit: 10
selector:
matchLabels:
Expand Down
28 changes: 28 additions & 0 deletions charts/tempo-distributed/templates/query-frontend/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if .Values.queryFrontend.autoscaling.enabled }}
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "tempo.queryFrontendFullname" . }}
labels:
{{- include "tempo.queryFrontendLabels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "tempo.queryFrontendFullname" . }}
minReplicas: {{ .Values.queryFrontend.autoscaling.minReplicas }}
maxReplicas: {{ .Values.queryFrontend.autoscaling.maxReplicas }}
metrics:
{{- with .Values.queryFrontend.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
targetAverageUtilization: {{ . }}
{{- end }}
{{- with .Values.queryFrontend.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
targetAverageUtilization: {{ . }}
{{- end }}
{{- end }}
45 changes: 45 additions & 0 deletions charts/tempo-distributed/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ ingester:
annotations: {}
# -- Number of replicas for the ingester
replicas: 1
autoscaling:
# -- Enable autoscaling for the ingester
enabled: false
# -- Minimum autoscaling replicas for the ingester
minReplicas: 1
# -- Maximum autoscaling replicas for the ingester
maxReplicas: 3
# -- Target CPU utilisation percentage for the ingester
targetCPUUtilizationPercentage: 60
# -- Target memory utilisation percentage for the ingester
targetMemoryUtilizationPercentage:
image:
# -- The Docker registry for the ingester image. Overrides `tempo.image.registry`
registry: null
Expand Down Expand Up @@ -131,9 +142,21 @@ ingester:
# -- Duration to keep blocks in the ingester after they have been flushed
complete_block_timeout: null

# Configuration for the distributor
distributor:
# -- Number of replicas for the distributor
replicas: 1
autoscaling:
# -- Enable autoscaling for the distributor
enabled: false
# -- Minimum autoscaling replicas for the distributor
minReplicas: 1
# -- Maximum autoscaling replicas for the distributor
maxReplicas: 3
# -- Target CPU utilisation percentage for the distributor
targetCPUUtilizationPercentage: 60
# -- Target memory utilisation percentage for the distributor
targetMemoryUtilizationPercentage:
image:
# -- The Docker registry for the ingester image. Overrides `tempo.image.registry`
registry: null
Expand Down Expand Up @@ -315,6 +338,17 @@ queryFrontend:
backend: 127.0.0.1:3100
# -- Number of replicas for the query-frontend
replicas: 1
autoscaling:
# -- Enable autoscaling for the query-frontend
enabled: false
# -- Minimum autoscaling replicas for the query-frontend
minReplicas: 1
# -- Maximum autoscaling replicas for the query-frontend
maxReplicas: 3
# -- Target CPU utilisation percentage for the query-frontend
targetCPUUtilizationPercentage: 60
# -- Target memory utilisation percentage for the query-frontend
targetMemoryUtilizationPercentage:
image:
# -- The Docker registry for the query-frontend image. Overrides `tempo.image.registry`
registry: null
Expand Down Expand Up @@ -663,6 +697,17 @@ gateway:
enabled: false
# -- Number of replicas for the gateway
replicas: 1
autoscaling:
# -- Enable autoscaling for the gateway
enabled: false
# -- Minimum autoscaling replicas for the gateway
minReplicas: 1
# -- Maximum autoscaling replicas for the gateway
maxReplicas: 3
# -- Target CPU utilisation percentage for the gateway
targetCPUUtilizationPercentage: 60
# -- Target memory utilisation percentage for the gateway
targetMemoryUtilizationPercentage:
# -- Enable logging of 2xx and 3xx HTTP requests
verboseLogging: true
image:
Expand Down

0 comments on commit b1b142e

Please sign in to comment.