- an example of implementing some alerting rules to dragonflydb (not saying it's correct :) use at your own risk)
- some rules need label dragonfly_cluster in order to work, in PodMonitor, there is section relabelings that calculates this field
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
  name: monitor
  namespace: caches
spec:
  namespaceSelector:
    matchNames:
      - caches
  selector:
    matchLabels:
      app.kubernetes.io/name: dragonfly
  podMetricsEndpoints:
    - port: admin
      metricRelabelings:
        - action: keep
          regex: >-
            (dragonfly_master|dragonfly_blocked_clients|dragonfly_connected_clients|dragonfly_memory_used_bytes|dragonfly_memory_max_bytes|dragonfly_connected_replica_lag_records)
          sourceLabels:
            - __name__
      relabelings:
        - sourceLabels: [pod]
          separator: "-"
          regex: "(.*)-(.*)"
          targetLabel: dragonfly_cluster
          replacement: "$1"apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: dragonfly
  namespace: caches
spec:
  groups:
    - name: dragonfly
      rules:
        - alert: DragonflyClusterHasNoMaster
          expr: sum(dragonfly_master) without (pod, instance, container, job, endpoint) < 1
          for: 0m
          labels:
            severity: critical
          annotations:
            summary: Dragonfly cluster {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }} has no master!
        - alert: DragonflyClusterHasMultipleMasters
          expr: sum(dragonfly_master) without (pod, instance, container, job, endpoint) > 1
          for: 0m
          labels:
            severity: critical
          annotations:
            summary: Dragonfly cluster {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }} has multiple masters!
        - alert: DragonflyBlockingConnections
          expr: increase(dragonfly_blocked_clients[1m]) > 0
          for: 0m
          labels:
            severity: warning
          annotations:
            summary: Dragonfly instance {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }}/{{ $labels.pod }} blocked some connections.
        - alert: DragonflyNoConnections
          expr: dragonfly_connected_clients < 4 and dragonfly_master == 1
          for: 0m
          labels:
            severity: warning
          annotations:
            summary: Dragonfly master instance {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }}/{{ $labels.pod }} has no connections.
        - alert: DragonflyOutOfMemory
          expr: dragonfly_memory_used_bytes / dragonfly_memory_max_bytes * 100 > 90
          for: 0m
          labels:
            severity: critical
          annotations:
            summary: Dragonfly instance {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }}/{{ $labels.pod }} is running out of memory!
        - alert: DragonflyReplicaLagging
          expr: dragonfly_connected_replica_lag_records > 100
          for: 5m
          labels:
            severity: warning
          annotations:
            summary: Dragonfly instance {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }}/{{ $labels.pod }} is lagging behind master!
       - alert: DragonflyTooManyConnections
         expr: dragonfly_connected_clients / dragonfly_max_clients * 100 > 90
         for: 2m
         labels:
           severity: warning
         annotations:
           summary: Dragonfly too many connections {{ $labels.namespace }}/{{ $labels.dragonfly_cluster }}/{{ $labels.pod }}