Skip to content

Commit

Permalink
Support dashboards for SSD mode (grafana#6540)
Browse files Browse the repository at this point in the history
* Support dashboards for SSD mode

* Fix issues

* Add compiled mixins

* Flip $.config.ssd on if conditions

* Use prefix matcher for ssd

* Add compiled SSD mixin

* Update loki-mixin-check to check ssd compiled mixin

* Checkout compiled ssd mixin
  • Loading branch information
salvacorts authored Jul 5, 2022
1 parent b930104 commit 7314d95
Show file tree
Hide file tree
Showing 32 changed files with 14,555 additions and 1,663 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ clients/cmd/promtail/promtail-debug:

MIXIN_PATH := production/loki-mixin
MIXIN_OUT_PATH := production/loki-mixin-compiled
MIXIN_OUT_PATH_SSD := production/loki-mixin-compiled-ssd

loki-mixin:
ifeq ($(BUILD_IN_CONTAINER),true)
Expand All @@ -226,11 +227,16 @@ else
@rm -rf $(MIXIN_OUT_PATH) && mkdir $(MIXIN_OUT_PATH)
@cd $(MIXIN_PATH) && jb install
@mixtool generate all --output-alerts $(MIXIN_OUT_PATH)/alerts.yaml --output-rules $(MIXIN_OUT_PATH)/rules.yaml --directory $(MIXIN_OUT_PATH)/dashboards ${MIXIN_PATH}/mixin.libsonnet

@rm -rf $(MIXIN_OUT_PATH_SSD) && mkdir $(MIXIN_OUT_PATH_SSD)
@cd $(MIXIN_PATH) && jb install
@mixtool generate all --output-alerts $(MIXIN_OUT_PATH_SSD)/alerts.yaml --output-rules $(MIXIN_OUT_PATH_SSD)/rules.yaml --directory $(MIXIN_OUT_PATH_SSD)/dashboards ${MIXIN_PATH}/mixin-ssd.libsonnet
endif

loki-mixin-check: loki-mixin
@echo "Checking diff"
@git diff --exit-code -- $(MIXIN_OUT_PATH) || (echo "Please build mixin by running 'make loki-mixin'" && false)
@git diff --exit-code -- $(MIXIN_OUT_PATH_SSD) || (echo "Please build mixin by running 'make loki-mixin'" && false)

###############
# Migrate #
Expand Down
41 changes: 41 additions & 0 deletions production/loki-mixin-compiled-ssd/alerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
groups:
- name: loki_alerts
rules:
- alert: LokiRequestErrors
annotations:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
expr: |
100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
/
sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)
> 10
for: 15m
labels:
severity: critical
- alert: LokiRequestPanics
annotations:
message: |
{{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics.
expr: |
sum(increase(loki_panic_total[10m])) by (namespace, job) > 0
labels:
severity: critical
- alert: LokiRequestLatency
annotations:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
expr: |
namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*"} > 1
for: 15m
labels:
severity: critical
- alert: LokiTooManyCompactorsRunning
annotations:
message: |
{{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time.
expr: |
sum(loki_boltdb_shipper_compactor_running) by (namespace) > 1
for: 5m
labels:
severity: warning
Loading

0 comments on commit 7314d95

Please sign in to comment.