diff --git a/.gitignore b/.gitignore index 63b60f63..9c5bb3b2 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,4 @@ certs.yaml *.log site/ cacerts.yaml +src/terraform/.terraform.lock.hcl diff --git a/src/terraform/main.tf b/src/terraform/main.tf new file mode 100644 index 00000000..7e03f5e8 --- /dev/null +++ b/src/terraform/main.tf @@ -0,0 +1,146 @@ +resource "kubernetes_namespace" "infrastructure" { + metadata { + name = var.namespace + } +} + +module "consul" { + source = "./modules/consul" + count = var.install_consul == true ? 1 : 0 + domain-name = "consul.${var.namespace}.${var.domain-name}" + namespace = var.namespace + datacenter = var.consul-datacenter + depends_on = [ + kubernetes_namespace.infrastructure + ] +} + +output "consul-url" { + value = var.install_consul == true ? module.consul.0.consul-url : null +} + +module "cert-manager" { + count = var.install_cert_manager == true ? 1 : 0 + source = "./modules/cert-manager" +} + +module "consul-coredns" { + source = "./modules/consul-coredns" + count = var.patch_coredns == true ? 1 : 0 + namespace = var.namespace + depends_on = [ + module.consul + ] +} + +module "traefik" { + source = "./modules/traefik" + count = var.install_traefik == true ? 1 : 0 + domain-name = "${var.domain-name}" + traefik-domain-name = "traefik.${var.namespace}.${var.domain-name}" + service-ip = var.service-ip + namespace = var.namespace + depends_on = [ + module.consul, + module.cert-manager + ] +} + +output "traefik-url" { + value = var.install_traefik == true ? module.traefik.0.traefik-url : null +} + +module "prometheus" { + source = "./modules/prometheus" + count = var.install_prometheus == true ? 1 : 0 + prometheus-domain-name = "prometheus.${var.namespace}.${var.domain-name}" + grafana-domain-name = "grafana.${var.namespace}.${var.domain-name}" + namespace = var.namespace + depends_on = [ + module.consul, + module.cert-manager + ] +} + +output "prometheus-url" { + value = var.install_prometheus == true ? module.prometheus.0.prometheus-url : null +} + +output "grafana-url" { + value = var.install_prometheus == true ? module.prometheus.0.grafana-url : null +} + +module "loki" { + source = "./modules/loki" + count = var.install_loki == true ? 1 : 0 + namespace = var.namespace + depends_on = [ + module.consul, + module.cert-manager + ] +} + +module "jaeger" { + source = "./modules/jaeger" + count = var.install_jaeger == true ? 1 : 0 + namespace = var.namespace + jaeger-domain-name = "jaeger.${var.namespace}.${var.domain-name}" + depends_on = [ + module.consul, + module.cert-manager + ] +} + +output "jaeger-url" { + value = var.install_jaeger == true ? module.jaeger.0.jaeger-url : null +} + +module "elasticsearch" { + source = "./modules/elasticsearch" + count = var.install_elasticsearch == true ? 1 : 0 + namespace = var.namespace + elastic-domain-name = "es.${var.namespace}.${var.domain-name}" + kibana-domain-name = "kibana.${var.namespace}.${var.domain-name}" + depends_on = [ + module.consul, + module.cert-manager + ] +} + +output "elastic-url" { + value = var.install_elasticsearch == true ? module.elasticsearch.0.elastic-url : null +} + +output "kibana-url" { + value = var.install_elasticsearch == true ? module.elasticsearch.0.kibana-url : null +} + +output "elasticsearch-user" { + value = var.install_elasticsearch == true ? module.elasticsearch.0.elastic-user : null + sensitive = true +} + +module "identityserver4" { + source = "./modules/identityserver4admin" + count = var.install_identityserver4admin == true ? 1 : 0 + namespace = var.namespace + login-domain-name = "login.${var.domain-name}" + admin-domain-name = "admin.login.${var.domain-name}" + api-domain-name = "api.login.${var.domain-name}" + depends_on = [ + module.consul, + module.cert-manager + ] +} + +output "login-url" { + value = var.install_identityserver4admin == true ? module.identityserver4.0.login-url : null +} + +output "login-admin-url" { + value = var.install_identityserver4admin == true ? module.identityserver4.0.admin-url : null +} + +output "login-api-url" { + value = var.install_identityserver4admin == true ? module.identityserver4.0.api-url : null +} \ No newline at end of file diff --git a/src/terraform/modules/cert-manager/cluster-issuer.yaml b/src/terraform/modules/cert-manager/cluster-issuer.yaml new file mode 100644 index 00000000..d7ef2a9d --- /dev/null +++ b/src/terraform/modules/cert-manager/cluster-issuer.yaml @@ -0,0 +1,7 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: selfsigned-ca-issuer +spec: + ca: + secretName: ca-key-pair \ No newline at end of file diff --git a/src/terraform/modules/cert-manager/main.tf b/src/terraform/modules/cert-manager/main.tf new file mode 100644 index 00000000..63cd7b9e --- /dev/null +++ b/src/terraform/modules/cert-manager/main.tf @@ -0,0 +1,53 @@ +resource "kubernetes_namespace" "cert-manager" { + metadata { + name = "cert-manager" + } +} + + +resource "helm_release" "cert-manager" { + name = "cert-manager" + + repository = "https://charts.jetstack.io" + chart = "cert-manager" + namespace = "cert-manager" + version = "v1.3.1" + wait = true + wait_for_jobs = true + set { + name = "installCRDs" + value = "true" + } + + depends_on = [ + kubernetes_namespace.cert-manager + ] +} + +resource "kubernetes_secret" "ca-key-pair" { + metadata { + name = "ca-key-pair" + namespace = "cert-manager" + } + data = { + "tls.crt" = file("./certs/cacerts.crt") + "tls.key" = file("./certs/cacerts.key") + } + type = "kubernetes.io/tls" +} + +resource "time_sleep" "wait_10_seconds" { + depends_on = [ + helm_release.cert-manager, + kubernetes_secret.ca-key-pair + ] + + create_duration = "10s" +} + +resource "kubectl_manifest" "cluster-issuer" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = file("${path.module}/cluster-issuer.yaml") +} diff --git a/src/terraform/modules/cert-manager/providers.tf b/src/terraform/modules/cert-manager/providers.tf new file mode 100644 index 00000000..dccdca06 --- /dev/null +++ b/src/terraform/modules/cert-manager/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} \ No newline at end of file diff --git a/src/terraform/modules/consul-coredns/coredns-orig.yaml b/src/terraform/modules/consul-coredns/coredns-orig.yaml new file mode 100644 index 00000000..4a58f813 --- /dev/null +++ b/src/terraform/modules/consul-coredns/coredns-orig.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: coredns + namespace: kube-system +data: + Corefile: | + .:53 { + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus :9153 + forward . /etc/resolv.conf { + max_concurrent 1000 + } + cache 30 + loop + reload + loadbalance + } diff --git a/src/terraform/modules/consul-coredns/coredns.yaml b/src/terraform/modules/consul-coredns/coredns.yaml new file mode 100644 index 00000000..637a0b87 --- /dev/null +++ b/src/terraform/modules/consul-coredns/coredns.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: coredns + namespace: kube-system +data: + Corefile: | + consul:53 { + log + errors + cache 30 + forward . ${consul_ip} + } + .:53 { + errors + health { + lameduck 5s + } + rewrite name login.k8s.local identityserver4-identity.infrastructure.svc.cluster.local + rewrite name admin.login.k8s.local identityserver4-admin.infrastructure.svc.cluster.local + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus :9153 + forward . /etc/resolv.conf { + max_concurrent 1000 + } + cache 30 + loop + reload + loadbalance + } diff --git a/src/terraform/modules/consul-coredns/main.tf b/src/terraform/modules/consul-coredns/main.tf new file mode 100644 index 00000000..9047357f --- /dev/null +++ b/src/terraform/modules/consul-coredns/main.tf @@ -0,0 +1,18 @@ +data "kubernetes_service" "consul-dns" { + metadata { + name = "consul-consul-dns" + namespace = var.namespace + } +} + +resource "kubectl_manifest" "coredns" { + yaml_body = templatefile("${path.module}/coredns.yaml", { + consul_ip = data.kubernetes_service.consul-dns.spec.0.cluster_ip + }) + // yaml_body = templatefile("${path.module}/coredns-orig.yaml", { + // consul_ip = var.consul_ip + // }) + lifecycle { + prevent_destroy = false + } +} diff --git a/src/terraform/modules/consul-coredns/providers.tf b/src/terraform/modules/consul-coredns/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/consul-coredns/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/consul-coredns/variables.tf b/src/terraform/modules/consul-coredns/variables.tf new file mode 100644 index 00000000..779d2cf5 --- /dev/null +++ b/src/terraform/modules/consul-coredns/variables.tf @@ -0,0 +1,3 @@ +variable "namespace" { + type = string +} diff --git a/src/terraform/modules/consul/consul-values.yaml b/src/terraform/modules/consul/consul-values.yaml new file mode 100644 index 00000000..2f2d6ace --- /dev/null +++ b/src/terraform/modules/consul/consul-values.yaml @@ -0,0 +1,86 @@ +# Choose an optional name for the datacenter +global: + datacenter: ${datacenter} + tls: + enabled: true + verify: false + acls: + manageSystemACLs: true + +# Enable the Consul Web UI via a NodePort +ui: + enabled: true + # service: + # enabled: true + # type: 'NodePort' + ingress: + enabled: true + hosts: + - host: ${domain} + paths: + - / + passHostHeader: true + backend: + serviceName: consul-consul-ui + servicePort: 443 + tls: + - hosts: + - ${domain} + secretName: traefik-cert + annotations: | + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + +# Enable Connect for secure communication between nodes +connectInject: + enabled: true + aclBindingRuleSelector: "" # https://github.com/hashicorp/consul-helm/issues/242 +# Enable CRD Controller +controller: + enabled: true + +# ingressGateways: +# enabled: true +# defaults: +# replicas: 1 #default = 2, but they cannot be on the same node. there is only one node on docker-desktop +# affinitiy: null +# service: +# type: 'NodePort' +# ports: +# - port: 8080 +# nodePort: null +# - port: 8443 +# nodePort: null +# gateways: +# - name: ingress-gateway +# service: +# type: LoadBalancer + +# client: +# affinity: | +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchExpressions: +# - key: node-role.kubernetes.io/master +# operator: DoesNotExist + +# Use only one Consul server for local development +server: + replicas: 1 + bootstrapExpect: 1 + disruptionBudget: + enabled: true + maxUnavailable: 0 + updatePartition: 0 + # extraConfig: | + # { + # "ui_config": { + # "content_path": "/consul" + # } + # } + +# syncCatalog: +# enabled: true +# k8sAllowNamespaces: ['webshop'] +# k8sDenyNamespaces: ['kube-system', 'kube-public'] \ No newline at end of file diff --git a/src/terraform/modules/consul/main.tf b/src/terraform/modules/consul/main.tf new file mode 100644 index 00000000..6ad1cfa5 --- /dev/null +++ b/src/terraform/modules/consul/main.tf @@ -0,0 +1,20 @@ +resource "helm_release" "consul" { + name = "consul" + + repository = "https://helm.releases.hashicorp.com" + chart = "consul" + namespace = var.namespace + version = "0.31.1" + wait = true + wait_for_jobs = true + values = [ + "${templatefile("${path.module}/consul-values.yaml", { + domain = var.domain-name, + datacenter = var.datacenter + })}" + ] +} + +output "consul-url" { + value = "https://${var.domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/consul/variables.tf b/src/terraform/modules/consul/variables.tf new file mode 100644 index 00000000..25527c65 --- /dev/null +++ b/src/terraform/modules/consul/variables.tf @@ -0,0 +1,11 @@ +variable "domain-name" { + type = string +} + +variable "namespace" { + type = string +} + +variable "datacenter" { + type = string +} \ No newline at end of file diff --git a/src/terraform/modules/elasticsearch/crds/elasticsearch-ingress.yaml b/src/terraform/modules/elasticsearch/crds/elasticsearch-ingress.yaml new file mode 100644 index 00000000..30041cd1 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/elasticsearch-ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: elasticsearch + namespace: ${namespace} + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd +spec: + rules: + - host: ${elastic-domain-name} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: elastic-es-es-http + port: + number: 9200 + tls: + - secretName: traefik-cert + hosts: + - ${elastic-domain-name} diff --git a/src/terraform/modules/elasticsearch/crds/elasticsearch-serviceaccount.yaml b/src/terraform/modules/elasticsearch/crds/elasticsearch-serviceaccount.yaml new file mode 100644 index 00000000..37045930 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/elasticsearch-serviceaccount.yaml @@ -0,0 +1,6 @@ +# Service account for the Elasticsearch service (for ACL enforcement) +apiVersion: v1 +kind: ServiceAccount +metadata: + name: elastic-es + namespace: ${namespace} diff --git a/src/terraform/modules/elasticsearch/crds/elasticsearch.yaml b/src/terraform/modules/elasticsearch/crds/elasticsearch.yaml new file mode 100644 index 00000000..786a3162 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/elasticsearch.yaml @@ -0,0 +1,25 @@ +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + name: elastic-es + namespace: ${namespace} +spec: + version: 7.12.1 + http: + tls: + selfSignedCertificate: + disabled: true + nodeSets: + - name: default + count: 1 + config: + node.store.allow_mmap: false + podTemplate: + metadata: + annotations: + consul.hashicorp.com/connect-service: "elastic-es" + consul.hashicorp.com/connect-inject: "true" + consul.hashicorp.com/connect-service-port: "http" + spec: + automountServiceAccountToken: true + serviceAccount: elastic-es diff --git a/src/terraform/modules/elasticsearch/crds/kibana-ingress.yaml b/src/terraform/modules/elasticsearch/crds/kibana-ingress.yaml new file mode 100644 index 00000000..fcd09f82 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/kibana-ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: kibana + namespace: ${namespace} + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd +spec: + rules: + - host: ${kibana-domain-name} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: elastic-kb-kb-http + port: + number: 5601 + tls: + - secretName: traefik-cert + hosts: + - ${kibana-domain-name} diff --git a/src/terraform/modules/elasticsearch/crds/kibana-serviceaccount.yaml b/src/terraform/modules/elasticsearch/crds/kibana-serviceaccount.yaml new file mode 100644 index 00000000..1ae915b9 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/kibana-serviceaccount.yaml @@ -0,0 +1,6 @@ +# Service account for the Kibana service (for ACL enforcement) +apiVersion: v1 +kind: ServiceAccount +metadata: + name: elastic-kb + namespace: ${namespace} diff --git a/src/terraform/modules/elasticsearch/crds/kibana.yaml b/src/terraform/modules/elasticsearch/crds/kibana.yaml new file mode 100644 index 00000000..1de8ac89 --- /dev/null +++ b/src/terraform/modules/elasticsearch/crds/kibana.yaml @@ -0,0 +1,37 @@ +apiVersion: kibana.k8s.elastic.co/v1 +kind: Kibana +metadata: + name: elastic-kb + namespace: ${namespace} +spec: + version: 7.12.1 + count: 1 + http: + tls: + selfSignedCertificate: + disabled: true + elasticsearchRef: # This connection does not go through the mesh + name: elastic-es + # config: + # elasticsearch.hosts: + # - http://127.0.0.1:9200 + # elasticsearch.username: elastic + # elasticsearch.ssl.verificationMode: none + podTemplate: + metadata: + annotations: + consul.hashicorp.com/connect-service: "elastic-kb" + consul.hashicorp.com/connect-inject: "true" + consul.hashicorp.com/connect-service-port: "http" + # consul.hashicorp.com/connect-service-upstreams: "elastic-es:9200" + spec: + automountServiceAccountToken: true + serviceAccount: elastic-kb + # containers: + # - name: kibana + # env: + # - name: ELASTICSEARCH_PASSWORD + # valueFrom: + # secretKeyRef: + # name: elastic-es-es-elastic-user + # key: elastic diff --git a/src/terraform/modules/elasticsearch/eck-values.yaml b/src/terraform/modules/elasticsearch/eck-values.yaml new file mode 100644 index 00000000..e69de29b diff --git a/src/terraform/modules/elasticsearch/main.tf b/src/terraform/modules/elasticsearch/main.tf new file mode 100644 index 00000000..21051550 --- /dev/null +++ b/src/terraform/modules/elasticsearch/main.tf @@ -0,0 +1,88 @@ +resource "helm_release" "elastic-operator" { + name = "elastic-operator" + + repository = "https://helm.elastic.co" + chart = "eck-operator" + namespace = var.namespace + version = "1.6.0" + wait = true + wait_for_jobs = true + values = [ + "${templatefile("${path.module}/eck-values.yaml", { + })}" + ] +} + +resource "kubectl_manifest" "elastic-serviceaccount" { + yaml_body = templatefile("${path.module}/crds/elasticsearch-serviceaccount.yaml", { + namespace = var.namespace + }) +} + +resource "kubectl_manifest" "kibana-serviceaccount" { + yaml_body = templatefile("${path.module}/crds/kibana-serviceaccount.yaml", { + namespace = var.namespace + }) +} + + +resource "kubectl_manifest" "elasticsearch" { + depends_on = [ + helm_release.elastic-operator, + ] + yaml_body = templatefile("${path.module}/crds/elasticsearch.yaml", { + namespace = var.namespace + }) +} + +resource "kubectl_manifest" "elasticsearch-ingress" { + depends_on = [ + helm_release.elastic-operator, + ] + yaml_body = templatefile("${path.module}/crds/elasticsearch-ingress.yaml", { + namespace = var.namespace + elastic-domain-name = var.elastic-domain-name + }) +} + +resource "kubectl_manifest" "kibana" { + depends_on = [ + helm_release.elastic-operator, + ] + yaml_body = templatefile("${path.module}/crds/kibana.yaml", { + namespace = var.namespace + }) +} + +resource "kubectl_manifest" "kibana-ingress" { + depends_on = [ + helm_release.elastic-operator, + ] + yaml_body = templatefile("${path.module}/crds/kibana-ingress.yaml", { + namespace = var.namespace + kibana-domain-name = var.kibana-domain-name + }) +} + +data "kubernetes_secret" "elastic-user" { + metadata { + name = "elastic-es-es-elastic-user" + namespace = var.namespace + } + depends_on = [ + helm_release.elastic-operator, + ] +} + +output "elastic-user" { + value = data.kubernetes_secret.elastic-user.data.elastic + sensitive = true +} + +output "elastic-url" { + value = "https://${var.elastic-domain-name}" +} + +output "kibana-url" { + value = "https://${var.kibana-domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/elasticsearch/providers.tf b/src/terraform/modules/elasticsearch/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/elasticsearch/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/elasticsearch/variables.tf b/src/terraform/modules/elasticsearch/variables.tf new file mode 100644 index 00000000..32d4f8a2 --- /dev/null +++ b/src/terraform/modules/elasticsearch/variables.tf @@ -0,0 +1,11 @@ +variable "elastic-domain-name" { + type = string +} + +variable "kibana-domain-name" { + type = string +} + +variable "namespace" { + type = string +} diff --git a/src/terraform/modules/identityserver4admin/identityserver4admin-values.yaml b/src/terraform/modules/identityserver4admin/identityserver4admin-values.yaml new file mode 100644 index 00000000..de3ebf02 --- /dev/null +++ b/src/terraform/modules/identityserver4admin/identityserver4admin-values.yaml @@ -0,0 +1,103 @@ +seed: + defaultAdmin: + password: Password_123 + email: your.email@gmail.com + +certificates: + certManager: + enabled: true + issuerRef: + name: selfsigned-ca-issuer + kind: ClusterIssuer + +admin: + protocol: https + domainName: ${admin-domain-name} + ssl: + enabled: true + secretName: identityserver4-cert-admin + service: + type: ClusterIP + port: 443 + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service": "identityserver4-identityserver4admin-admin" + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: ${admin-domain-name} + paths: + - path: / + backend: + serviceName: identityserver4-admin + servicePort: 443 + tls: + - secretName: traefik-cert + hosts: + - ${admin-domain-name} +identity: + protocol: https + domainName: ${login-domain-name} + ssl: + enabled: true + secretName: identityserver4-cert-identity + service: + type: ClusterIP + port: 443 + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service": "identityserver4-identityserver4admin-identity" + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: ${login-domain-name} + paths: + - path: / + backend: + serviceName: identityserver4-identity + servicePort: 443 + tls: + - secretName: traefik-cert + hosts: + - ${login-domain-name} +api: + enabled: true + protocol: https + domainName: ${api-domain-name} + ssl: + enabled: true + secretName: identityserver4-cert-api + service: + type: ClusterIP + port: 443 + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service": "identityserver4-identityserver4admin-api" + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: ${api-domain-name} + paths: + - path: / + backend: + serviceName: identityserver4-api + servicePort: 443 + tls: + - secretName: traefik-cert + hosts: + - ${api-domain-name} diff --git a/src/terraform/modules/identityserver4admin/main.tf b/src/terraform/modules/identityserver4admin/main.tf new file mode 100644 index 00000000..adee350d --- /dev/null +++ b/src/terraform/modules/identityserver4admin/main.tf @@ -0,0 +1,25 @@ +resource "helm_release" "identityserver4" { + name = "identityserver4" + + repository = "https://bravecobra.github.io/identityserver4.admin-helm/charts/" + chart = "identityserver4admin" + namespace = var.namespace + version = "0.4.0" + values = [ + "${templatefile("${path.module}/identityserver4admin-values.yaml", { + admin-domain-name = var.admin-domain-name, + login-domain-name = var.login-domain-name, + api-domain-name = var.api-domain-name + })}" + ] +} + +output "login-url" { + value = "https://${var.login-domain-name}" +} +output "admin-url" { + value = "https://${var.admin-domain-name}" +} +output "api-url" { + value = "https://${var.api-domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/identityserver4admin/providers.tf b/src/terraform/modules/identityserver4admin/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/identityserver4admin/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/identityserver4admin/variables.tf b/src/terraform/modules/identityserver4admin/variables.tf new file mode 100644 index 00000000..0fce2510 --- /dev/null +++ b/src/terraform/modules/identityserver4admin/variables.tf @@ -0,0 +1,15 @@ +variable "admin-domain-name" { + type = string +} + +variable "login-domain-name" { + type = string +} + +variable "api-domain-name" { + type = string +} + +variable "namespace" { + type = string +} diff --git a/src/terraform/modules/jaeger/crds/all-in-one.yaml b/src/terraform/modules/jaeger/crds/all-in-one.yaml new file mode 100644 index 00000000..79b24c5f --- /dev/null +++ b/src/terraform/modules/jaeger/crds/all-in-one.yaml @@ -0,0 +1,34 @@ +apiVersion: jaegertracing.io/v1 +kind: Jaeger +metadata: + name: jaeger + namespace: ${namespace} +spec: + annotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service-port": "6831" + "consul.hashicorp.com/transparent-proxy": "true" + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + hosts: + - ${domain-name} + path: / + tls: + - secretName: traefik-cert + hosts: + - ${domain-name} + allInOne: + options: + query: + base-path: /jaeger +agent: + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/transparent-proxy": "true" +query: + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/transparent-proxy": "true" diff --git a/src/terraform/modules/jaeger/crds/jaeger-admin-service.yaml b/src/terraform/modules/jaeger/crds/jaeger-admin-service.yaml new file mode 100644 index 00000000..f65c3a63 --- /dev/null +++ b/src/terraform/modules/jaeger/crds/jaeger-admin-service.yaml @@ -0,0 +1,18 @@ +kind: Service +apiVersion: v1 +metadata: + name: jaeger-admin + labels: + app: jaeger + namespace: ${namespace} +spec: + selector: + app: jaeger + app.kubernetes.io/component: all-in-one + app.kubernetes.io/instance: jaeger + app.kubernetes.io/managed-by: jaeger-operator + app.kubernetes.io/name: jaeger + app.kubernetes.io/part-of: jaeger + ports: + - name: admin-http + port: 14269 diff --git a/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.json b/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.json new file mode 100644 index 00000000..d2ad603e --- /dev/null +++ b/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.json @@ -0,0 +1,2669 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard for monitoring jaeger running in a k8s environment. Works with 1.9+. Feedbacks? Please send to luong.vo@employmenthero.com", + "editable": false, + "gnetId": 10001, + "graphTooltip": 0, + "id": 30, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 32, + "panels": [], + "title": "Jaeger Collector", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_collector_spans_received_total[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Spans Received/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jaeger_collector_in_queue_latency_sum / jaeger_collector_in_queue_latency_count", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average in-queue latency (sec)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jaeger_collector_batch_size", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Batch Size (spans)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jaeger_collector_queue_length", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Queue Length (Spans)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(jaeger_collector_spans_dropped_total[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Spans Dropped/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "jaeger_collector_save_latency_sum / jaeger_collector_save_latency_count", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Span save latency (sec)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 30, + "panels": [], + "repeat": null, + "title": "Jaeger Agent", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 16 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_thrift_udp_server_packets_processed_total[5m])) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_thrift_udp_server_packets_processed/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 16 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(jaeger_agent_thrift_udp_server_packet_size) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "avg(jaeger_agent_thrift_udp_server_packet_size)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 16 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(jaeger_agent_thrift_udp_server_queue_size) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_thrift_udp_server_queue_size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_thrift_udp_server_read_errors_total[5m])) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_thrift_udp_server_read_errors/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 22 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_thrift_udp_server_packets_dropped_total[5m])) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_thrift_udp_server_packets_dropped/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 22 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_thrift_udp_t_processor_handler_errors_total[5m])) by (protocol)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_thrift_udp_t_processor_handler_errors/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_reporter_batches_submitted_total[5m])) by (format)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_tchannel_reporter_batches_submitted/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 28 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_reporter_spans_submitted_total[5m])) by (format)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_tchannel_reporter_spans_submitted/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 28 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(jaeger_agent_reporter_batch_size) by (format)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "avg(jaeger_agent_tchannel_reporter_batch_size)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_http_server_requests_total[5m])) by (type)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_http_server_requests/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 34 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_reporter_batches_failures_total[5m])) by (format)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_tchannel_reporter_batches_failures/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 34 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_reporter_spans_failures_total[5m])) by (format)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_tchannel_reporter_spans_failures/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_agent_http_server_errors_total[5m])) by (source, status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source}}.{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "jaeger_agent_http_server_errors/sec", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 16, + "x": 8, + "y": 40 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(jaeger_agent_collector_proxy_total) by (endpoint,protocol,result)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "total(jaeger_agent_collector_proxy)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 46, + "panels": [], + "title": "Jaeger Query", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(jaeger_rpc_request_latency_sum / jaeger_rpc_request_latency_count)*1000", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average request latency (milliseconds)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 54 + }, + "hiddenSeries": false, + "id": 50, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_rpc_http_requests_total[5m]))*60", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "All requests (/min)", + "refId": "A" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{status_code=~\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "2xx Requests (/min)", + "refId": "B" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{status_code!=\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Non-2xx Requests (/min)", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests/min (All requests)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 54 + }, + "hiddenSeries": false, + "id": 52, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All requests (/min)", + "refId": "A" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces\",status_code=~\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "2xx Requests (/min)", + "refId": "B" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces\",status_code!=\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Non-2xx Requests (/min)", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests/min (endpoint=\"/api/traces\")", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 54 + }, + "hiddenSeries": false, + "id": 54, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces/-traceID-\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All requests (/min)", + "refId": "A" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces/-traceID-\",status_code=~\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "2xx Requests (/min)", + "refId": "B" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/traces/-traceID-\",status_code!=\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Non-2xx Requests (/min)", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests/min (endpoint=\"/api/traces/-traceID-\")", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 61 + }, + "hiddenSeries": false, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services/-service-/operations\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All requests (/min)", + "refId": "A" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services/-service-/operations\",status_code=~\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "2xx Requests (/min)", + "refId": "B" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services/-service-/operations\",status_code!=\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Non-2xx Requests (/min)", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests/min (endpoint=\"/api/services/-service-/operations\")", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 61 + }, + "hiddenSeries": false, + "id": 58, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All requests (/min)", + "refId": "A" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services\",status_code=~\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "2xx Requests (/min)", + "refId": "B" + }, + { + "expr": "sum(rate(jaeger_rpc_http_requests_total{endpoint=~\"/api/services\",status_code!=\"2xx\"}[5m]))*60", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Non-2xx Requests (/min)", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Requests/min (endpoint=\"/api/services\")", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "jaeger" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Jaeger Dashboard", + "uid": "zLOi95xmk", + "version": 2 + } \ No newline at end of file diff --git a/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.yaml b/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.yaml new file mode 100644 index 00000000..6b25d212 --- /dev/null +++ b/src/terraform/modules/jaeger/crds/jaeger-grafana-dashboard.yaml @@ -0,0 +1,913 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: jaeger-dashboard + namespace: ${namespace} + labels: + grafana_dashboard: "1" + annotations: + k8s-sidecar-target-directory: /tmp/dashboards/Infrastructure +data: + jaeger-dashboard.json: "{\r\n \"annotations\": {\r\n \"list\": [\r\n {\r\n + \ \"builtIn\": 1,\r\n \"datasource\": \"-- Grafana --\",\r\n + \ \"enable\": true,\r\n \"hide\": true,\r\n \"iconColor\": + \"rgba(0, 211, 255, 1)\",\r\n \"name\": \"Annotations & Alerts\",\r\n + \ \"type\": \"dashboard\"\r\n }\r\n ]\r\n },\r\n \"description\": + \"Dashboard for monitoring jaeger running in a k8s environment. Works with 1.9+. + Feedbacks? Please send to luong.vo@employmenthero.com\",\r\n \"editable\": + false,\r\n \"gnetId\": 10001,\r\n \"graphTooltip\": 0,\r\n \"id\": 30,\r\n + \ \"links\": [],\r\n \"panels\": [\r\n {\r\n \"collapsed\": false,\r\n + \ \"datasource\": null,\r\n \"gridPos\": {\r\n \"h\": 1,\r\n + \ \"w\": 24,\r\n \"x\": 0,\r\n \"y\": 0\r\n },\r\n + \ \"id\": 32,\r\n \"panels\": [],\r\n \"title\": \"Jaeger Dashboard + Collector\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"aliasColors\": + {},\r\n \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 7,\r\n \"w\": 8,\r\n \"x\": 0,\r\n \"y\": + 1\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 34,\r\n + \ \"legend\": {\r\n \"avg\": false,\r\n \"current\": false,\r\n + \ \"max\": false,\r\n \"min\": false,\r\n \"show\": + true,\r\n \"total\": false,\r\n \"values\": false\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 1,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"paceLength\": 10,\r\n \"percentage\": false,\r\n + \ \"pluginVersion\": \"7.5.3\",\r\n \"pointradius\": 5,\r\n \"points\": + false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": [],\r\n + \ \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(rate(jaeger_collector_spans_received_total[5m]))\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Spans Received/sec\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n + \ \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n },\r\n + \ \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 1\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 36,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"jaeger_collector_in_queue_latency_sum + / jaeger_collector_in_queue_latency_count\",\r\n \"format\": \"time_series\",\r\n + \ \"hide\": false,\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"\",\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Average in-queue latency (sec)\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 1\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 42,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"jaeger_collector_batch_size\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"\",\r\n \"refId\": \"A\"\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Batch Size (spans)\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 8\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 38,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"jaeger_collector_queue_length\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Queue Length (Spans)\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n + \ \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n },\r\n + \ \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 8\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 40,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"rate(jaeger_collector_spans_dropped_total[5m])\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"\",\r\n \"refId\": \"A\"\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Spans Dropped/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 8\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 44,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"jaeger_collector_save_latency_sum + / jaeger_collector_save_latency_count\",\r\n \"format\": \"time_series\",\r\n + \ \"intervalFactor\": 1,\r\n \"legendFormat\": \"\",\r\n + \ \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Span save latency (sec)\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"collapsed\": false,\r\n \"datasource\": + null,\r\n \"gridPos\": {\r\n \"h\": 1,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 15\r\n },\r\n \"id\": 30,\r\n + \ \"panels\": [],\r\n \"repeat\": null,\r\n \"title\": \"Jaeger + Agent\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"aliasColors\": + {},\r\n \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 6,\r\n \"w\": 8,\r\n \"x\": 0,\r\n \"y\": + 16\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 8,\r\n + \ \"legend\": {\r\n \"avg\": false,\r\n \"current\": false,\r\n + \ \"max\": false,\r\n \"min\": false,\r\n \"show\": + true,\r\n \"total\": false,\r\n \"values\": false\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 1,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"paceLength\": 10,\r\n \"percentage\": false,\r\n + \ \"pluginVersion\": \"7.5.3\",\r\n \"pointradius\": 5,\r\n \"points\": + false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": [],\r\n + \ \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_thrift_udp_server_packets_processed_total[5m])) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_thrift_udp_server_packets_processed/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 16\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 20,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"avg(jaeger_agent_thrift_udp_server_packet_size) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"avg(jaeger_agent_thrift_udp_server_packet_size)\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 16\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 24,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"avg(jaeger_agent_thrift_udp_server_queue_size) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_thrift_udp_server_queue_size\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 22\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 26,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_thrift_udp_server_read_errors_total[5m])) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_thrift_udp_server_read_errors/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 22\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 22,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_thrift_udp_server_packets_dropped_total[5m])) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_thrift_udp_server_packets_dropped/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 22\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 28,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_thrift_udp_t_processor_handler_errors_total[5m])) + by (protocol)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_thrift_udp_t_processor_handler_errors/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 28\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 6,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_reporter_batches_submitted_total[5m])) + by (format)\",\r\n \"format\": \"time_series\",\r\n \"hide\": + false,\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"\",\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_tchannel_reporter_batches_submitted/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 28\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 16,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_reporter_spans_submitted_total[5m])) + by (format)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_tchannel_reporter_spans_submitted/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 28\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 12,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"avg(jaeger_agent_reporter_batch_size) + by (format)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"avg(jaeger_agent_tchannel_reporter_batch_size)\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 34\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 4,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_http_server_requests_total[5m])) + by (type)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"jaeger_agent_http_server_requests/sec\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 34\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 14,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_reporter_batches_failures_total[5m])) + by (format)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_tchannel_reporter_batches_failures/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 34\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 18,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_reporter_spans_failures_total[5m])) + by (format)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"jaeger_agent_tchannel_reporter_spans_failures/sec\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 40\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 2,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_agent_http_server_errors_total[5m])) + by (source, status)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"{{source}}.{{status}}\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"jaeger_agent_http_server_errors/sec\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 16,\r\n + \ \"x\": 8,\r\n \"y\": 40\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 10,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(jaeger_agent_collector_proxy_total) + by (endpoint,protocol,result)\",\r\n \"format\": \"time_series\",\r\n + \ \"intervalFactor\": 1,\r\n \"refId\": \"A\"\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"total(jaeger_agent_collector_proxy)\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"collapsed\": false,\r\n + \ \"datasource\": null,\r\n \"gridPos\": {\r\n \"h\": 1,\r\n + \ \"w\": 24,\r\n \"x\": 0,\r\n \"y\": 46\r\n },\r\n + \ \"id\": 46,\r\n \"panels\": [],\r\n \"title\": \"Jaeger + Query\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"aliasColors\": + {},\r\n \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 7,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 47\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 48,\r\n + \ \"legend\": {\r\n \"avg\": false,\r\n \"current\": false,\r\n + \ \"hideEmpty\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"(jaeger_rpc_request_latency_sum / + jaeger_rpc_request_latency_count)*1000\",\r\n \"format\": \"time_series\",\r\n + \ \"intervalFactor\": 1,\r\n \"legendFormat\": \"\",\r\n + \ \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Average request latency (milliseconds)\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 0,\r\n \"y\": 54\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 50,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"All requests (/min)\",\r\n \"refId\": + \"A\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{status_code=~\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"2xx Requests (/min)\",\r\n \"refId\": + \"B\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{status_code!=\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"Non-2xx Requests (/min)\",\r\n \"refId\": + \"C\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Requests/min (All requests)\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 8,\r\n \"y\": 54\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 52,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"All requests (/min)\",\r\n \"refId\": + \"A\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces\\\",status_code=~\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"2xx Requests (/min)\",\r\n \"refId\": + \"B\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces\\\",status_code!=\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"Non-2xx Requests (/min)\",\r\n \"refId\": + \"C\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Requests/min (endpoint=\\\"/api/traces\\\")\",\r\n \"tooltip\": {\r\n + \ \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 7,\r\n \"w\": 8,\r\n + \ \"x\": 16,\r\n \"y\": 54\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 54,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces/-traceID-\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"All requests (/min)\",\r\n \"refId\": + \"A\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces/-traceID-\\\",status_code=~\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"2xx Requests (/min)\",\r\n \"refId\": + \"B\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/traces/-traceID-\\\",status_code!=\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"Non-2xx Requests (/min)\",\r\n \"refId\": + \"C\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Requests/min (endpoint=\\\"/api/traces/-traceID-\\\")\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 8,\r\n \"w\": 12,\r\n + \ \"x\": 0,\r\n \"y\": 61\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 56,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services/-service-/operations\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"All requests (/min)\",\r\n \"refId\": + \"A\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services/-service-/operations\\\",status_code=~\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"2xx Requests (/min)\",\r\n \"refId\": + \"B\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services/-service-/operations\\\",status_code!=\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"Non-2xx Requests (/min)\",\r\n \"refId\": + \"C\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Requests/min (endpoint=\\\"/api/services/-service-/operations\\\")\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 8,\r\n \"w\": 12,\r\n + \ \"x\": 12,\r\n \"y\": 61\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 58,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"All requests (/min)\",\r\n \"refId\": + \"A\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services\\\",status_code=~\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"2xx Requests (/min)\",\r\n \"refId\": + \"B\"\r\n },\r\n {\r\n \"expr\": \"sum(rate(jaeger_rpc_http_requests_total{endpoint=~\\\"/api/services\\\",status_code!=\\\"2xx\\\"}[5m]))*60\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"Non-2xx Requests (/min)\",\r\n \"refId\": + \"C\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Requests/min (endpoint=\\\"/api/services\\\")\",\r\n \"tooltip\": {\r\n + \ \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ }\r\n ],\r\n \"refresh\": false,\r\n \"schemaVersion\": 27,\r\n + \ \"style\": \"dark\",\r\n \"tags\": [\r\n \"jaeger\"\r\n ],\r\n + \ \"templating\": {\r\n \"list\": []\r\n },\r\n \"time\": {\r\n \"from\": + \"now-15m\",\r\n \"to\": \"now\"\r\n },\r\n \"timepicker\": {\r\n \"refresh_intervals\": + [\r\n \"5s\",\r\n \"10s\",\r\n \"30s\",\r\n \"1m\",\r\n + \ \"5m\",\r\n \"15m\",\r\n \"30m\",\r\n \"1h\",\r\n + \ \"2h\",\r\n \"1d\"\r\n ],\r\n \"time_options\": [\r\n + \ \"5m\",\r\n \"15m\",\r\n \"1h\",\r\n \"6h\",\r\n + \ \"12h\",\r\n \"24h\",\r\n \"2d\",\r\n \"7d\",\r\n + \ \"30d\"\r\n ]\r\n },\r\n \"timezone\": \"\",\r\n \"title\": + \"Jaeger Dashboard\",\r\n \"uid\": \"zLOi95xmk\",\r\n \"version\": 2\r\n }" + diff --git a/src/terraform/modules/jaeger/crds/readme.md b/src/terraform/modules/jaeger/crds/readme.md new file mode 100644 index 00000000..b70c2878 --- /dev/null +++ b/src/terraform/modules/jaeger/crds/readme.md @@ -0,0 +1,9 @@ +# Jaeger dashboard + +> This resources was created by generating it from the json file that was fetched from grafama after import it as dashboard `10001`, adding and annotating the resource. + +```powershell +kubectl apply configmap jaeger-dashboard --from-file=jaeger-dashboard.json=./src/skaffold/crds/jaeger/jaeger-grafana-dashboard.json -n infrastructure -o yaml > ./src/skaffold/crds/jaeger/jaeger-grafana-dashboard.yaml +kubectl label --overwrite -f ./src/skaffold/crds/jaeger/jaeger-grafana-dashboard.yaml grafana_dashboard=1 +kubectl annotate --overwrite -f ./src/skaffold/crds/jaeger/jaeger-grafana-dashboard.yaml k8s-sidecar-target-directory=/tmp/dashboards/Infrastructure +``` diff --git a/src/terraform/modules/jaeger/jaeger-values.yaml b/src/terraform/modules/jaeger/jaeger-values.yaml new file mode 100644 index 00000000..d7f91b7f --- /dev/null +++ b/src/terraform/modules/jaeger/jaeger-values.yaml @@ -0,0 +1,6 @@ +agent: + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" +query: + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" \ No newline at end of file diff --git a/src/terraform/modules/jaeger/main.tf b/src/terraform/modules/jaeger/main.tf new file mode 100644 index 00000000..428580e9 --- /dev/null +++ b/src/terraform/modules/jaeger/main.tf @@ -0,0 +1,49 @@ +resource "helm_release" "jaeger-operator" { + name = "jaeger-operator" + + repository = "https://jaegertracing.github.io/helm-charts" + chart = "jaeger-operator" + namespace = var.namespace + version = "2.21.1" +// wait = true +// wait_for_jobs = true + values = [ + "${templatefile("${path.module}/jaeger-values.yaml", { + })}" + ] +} + +resource "kubectl_manifest" "all-in-one" { + depends_on = [ + helm_release.jaeger-operator + ] + yaml_body = "${templatefile("${path.module}/crds/all-in-one.yaml", + { + namespace = var.namespace, + domain-name = var.jaeger-domain-name + })}" +} + +resource "kubectl_manifest" "jaeger-admin-service" { + depends_on = [ + helm_release.jaeger-operator + ] + yaml_body = "${templatefile("${path.module}/crds/jaeger-admin-service.yaml", + { + namespace = var.namespace, + })}" +} + +resource "kubectl_manifest" "jaeger-grafana-dashboard" { + depends_on = [ + helm_release.jaeger-operator + ] + yaml_body = "${templatefile("${path.module}/crds/jaeger-grafana-dashboard.yaml", + { + namespace = var.namespace, + })}" +} + +output "jaeger-url" { + value = "https://${var.jaeger-domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/jaeger/providers.tf b/src/terraform/modules/jaeger/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/jaeger/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/jaeger/variables.tf b/src/terraform/modules/jaeger/variables.tf new file mode 100644 index 00000000..eb3ca67a --- /dev/null +++ b/src/terraform/modules/jaeger/variables.tf @@ -0,0 +1,7 @@ +variable "jaeger-domain-name" { + type = string +} + +variable "namespace" { + type = string +} diff --git a/src/terraform/modules/loki/crds/loki-monitor-dashboard.json b/src/terraform/modules/loki/crds/loki-monitor-dashboard.json new file mode 100644 index 00000000..43774d6d --- /dev/null +++ b/src/terraform/modules/loki/crds/loki-monitor-dashboard.json @@ -0,0 +1,2385 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": false, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "description": "This dashboard can be used to detect issues on the Loki stack, when deployed in Kubernetes. Shows: some error metrics published by Promtail/Loki. Error and warning logs emitted by Promtail/Loki. Memory and CPU usage of Promtail/Loki compared against the Kubernetes memory/cpu limits and requests.", + "editable": true, + "gnetId": 14055, + "graphTooltip": 0, + "id": 31, + "links": [], + "panels": [ + { + "dashboardFilter": "", + "dashboardTags": [], + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "folderId": null, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 987, + "limit": "100", + "nameFilter": "", + "onlyAlertsOnDashboard": true, + "pluginVersion": "7.3.5", + "show": "current", + "sortOrder": 3, + "stateFilter": [], + "timeFrom": null, + "timeShift": null, + "title": "Summary of alerts status", + "type": "alertlist" + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 435, + "panels": [], + "title": "Problems in the Loki Stack", + "type": "row" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Loki has emitted error/warning messages in the last 5m", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Total number of messages logged by Loki itself", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 9, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 10, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "debug", + "color": "#C0D8FF" + }, + { + "alias": "info", + "color": "#5794F2" + }, + { + "alias": "warn", + "color": "#FF9830" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(log_messages_total{app=\"loki\",level=~\"error|warn\"}[1m])) by (level)\n", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{level}}", + "refId": "A" + }, + { + "expr": "log_messages_total", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Number of error/warning messages logged by Loki itself", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 15, + "x": 9, + "y": 7 + }, + "id": 511, + "options": { + "dedupStrategy": "none", + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "7.3.5", + "targets": [ + { + "expr": "{app=\"loki\"} | logfmt | level=\"warn\" or level=\"error\"", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error/warning messages logged by Loki itself", + "type": "logs" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Promtail has emitted error/warning messages in the last 5m", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Total number of messages logged by Promtail", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 9, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 106, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "debug", + "color": "#C0D8FF" + }, + { + "alias": "info", + "color": "#5794F2" + }, + { + "alias": "warn", + "color": "#FF9830" + }, + { + "alias": "error", + "color": "#C4162A" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(log_messages_total{app_kubernetes_io_name=\"promtail\",level=~\"error|warn\"}[1m])) by (level)\n", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{level}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error/warning messages logged by Promtail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "Loki", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 15, + "x": 9, + "y": 12 + }, + "id": 586, + "options": { + "dedupStrategy": "none", + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "7.3.5", + "targets": [ + { + "expr": "{app=\"promtail\"} | logfmt | level=\"warn\" or level=\"error\"", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error/warning messages logged by Promtail", + "type": "logs" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.01 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Loki distributor has failed to send batches to ingesters", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 676, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(loki_distributor_ingester_append_failures_total[1m])*60", + "instant": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.01, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki (distributor) - failed batch appends sent to ingesters", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.01 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Promtail has dropped logs in the last 5m", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Number of log entries dropped because failed to be sent to the Loki ingester after all retries.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 677, + "interval": "1m", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(promtail_dropped_entries_total[1m])*60", + "instant": false, + "interval": "", + "legendFormat": "pod={{kubernetes_pod_name}}, instance={{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.01, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail - dropped log entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 47, + "panels": [], + "title": "Logging activity", + "type": "row" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 5000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "The number of streams in Loki is above 5000", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "The total number of streams created per tenant.\nThis should not increase after startup.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "loki_ingester_memory_streams", + "interval": "", + "legendFormat": "Streams", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 5000, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Number of Streams in Loki", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "The total number of log entries received per tenant (not necessarily of lines, as an entry can have more than one line of text).", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 56, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(loki_distributor_lines_received_total[1m])", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki (distributor) - log entries received per second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 1000000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "1m", + "frequency": "1m", + "handler": 1, + "name": "Quantity of data received by the Loki ingester is suspiciously high", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "The total number of uncompressed bytes received per tenant.\n", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 137, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_distributor_bytes_received_total[1m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1000000, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes received in ingester per second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 18, + "panels": [], + "title": "Memory usage", + "type": "row" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 80 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "1m", + "frequency": "1m", + "handler": 1, + "name": "Loki memory usage is above 80% of the defined limit", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Percentage of actual usage over configured limit", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 47 + }, + "hiddenSeries": false, + "id": 247, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 * max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"})\n/\non (pod,container) group_left kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"}", + "hide": false, + "interval": "", + "legendFormat": "Usage (pod={{pod}}, container={{container}}, name={{name}})", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 80, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki memory usage (% of Kubernetes memory limit)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:241", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:242", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 53 + }, + "hiddenSeries": false, + "id": 25, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Limit", + "color": "rgba(255, 255, 255, 0.56)", + "dashes": true + }, + { + "alias": "Request", + "color": "rgba(255, 255, 255, 0.78)" + }, + { + "alias": "Usage", + "fill": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Usage (pod={{pod}}, container={{container}})", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"})", + "interval": "", + "legendFormat": "Limit", + "refId": "B" + }, + { + "expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"loki.*\"})", + "interval": "", + "legendFormat": "Request", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki memory usage vs Kubernetes limit and request", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:581", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:582", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 80 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Promtail memory usage is above 80% of the defined limit", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "description": "Percentage of actual usage over configured limit", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 59 + }, + "hiddenSeries": false, + "id": 772, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100* max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"})\n/\non (pod,container) (kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})", + "hide": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 80, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail memory usage (% of Kubernetes memory limit)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:335", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:336", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 65 + }, + "hiddenSeries": false, + "id": 1103, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Limit", + "color": "rgba(255, 255, 255, 0.56)", + "dashes": true + }, + { + "alias": "Request", + "color": "rgba(255, 255, 255, 0.78)" + }, + { + "alias": "Usage", + "fill": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "Usage (pod={{pod}}, container={{container}})", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})", + "interval": "", + "legendFormat": "Limit", + "refId": "B" + }, + { + "expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"promtail.*\"})", + "interval": "", + "legendFormat": "Request", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail memory usage vs Kubernetes limit and request", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:581", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:582", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": "Prometheus", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 71 + }, + "id": 8, + "panels": [], + "title": "CPU Usage", + "type": "row" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 80 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "1m", + "frequency": "1m", + "handler": 1, + "name": "Loki CPU usage is above 80% of the defined limit", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "percentage of configured limit", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 72 + }, + "height": "", + "hiddenSeries": false, + "id": 347, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100* rate(container_cpu_usage_seconds_total{container!=\"POD\",container!=\"\",pod=~\"loki.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{container=\"loki\"}", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 80, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki CPU usage (% of Kubernetes CPU limit)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:413", + "format": "percent", + "label": "cores", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:414", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 78 + }, + "height": "", + "hiddenSeries": false, + "id": 6, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "CPU Limit", + "color": "rgba(255, 255, 255, 0.46)", + "dashes": true + }, + { + "alias": "CPU Request", + "color": "rgba(255, 255, 255, 0.54)" + }, + { + "alias": "Usage", + "fill": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}[1m])", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "CPU Usage (pod={{pod}}, container={{container}})", + "metric": "container_cpu", + "refId": "A", + "step": 10 + }, + { + "expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"loki.*\"})", + "interval": "", + "legendFormat": "CPU Limit", + "refId": "B" + }, + { + "expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"loki.*\"})", + "hide": false, + "interval": "", + "legendFormat": "CPU Request", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki CPU usage vs Kubernetes limit and request", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:666", + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:667", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 80 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "keep_state", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Promtail CPU usage is above 80% of the defined limit", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "percentage of configured limit", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 84 + }, + "height": "", + "hiddenSeries": false, + "id": 910, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 * rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"}", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 80, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail CPU usage (% of Kubernetes limit)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:231", + "format": "percent", + "label": "cores", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:232", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "description": "", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 90 + }, + "height": "", + "hiddenSeries": false, + "id": 1104, + "isNew": true, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": null, + "sortDesc": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "CPU Limit", + "color": "rgba(255, 255, 255, 0.46)", + "dashes": true + }, + { + "alias": "CPU Request", + "color": "rgba(255, 255, 255, 0.54)" + }, + { + "alias": "Usage", + "fill": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "CPU Usage (pod={{pod}}, container={{container}})", + "metric": "container_cpu", + "refId": "A", + "step": 10 + }, + { + "expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"})", + "interval": "", + "legendFormat": "CPU Limit", + "refId": "B" + }, + { + "expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"promtail.*\"})", + "hide": false, + "interval": "", + "legendFormat": "CPU Request", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail CPU usage vs Kubernetes limit and request", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:666", + "format": "none", + "label": "cores", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:667", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "tools", + "loki" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Loki stack monitoring (Promtail, Loki)", + "uid": "loki_stack_monitoring_quortex", + "version": 1 + } \ No newline at end of file diff --git a/src/terraform/modules/loki/crds/loki-monitor-dashboard.yaml b/src/terraform/modules/loki/crds/loki-monitor-dashboard.yaml new file mode 100644 index 00000000..e36e5035 --- /dev/null +++ b/src/terraform/modules/loki/crds/loki-monitor-dashboard.yaml @@ -0,0 +1,835 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-monitor-dashboard + namespace: ${namespace} + labels: + grafana_dashboard: "1" + annotations: + k8s-sidecar-target-directory: /tmp/dashboards/Infrastructure +data: + loki-monitor-dashboard.json: "{\r\n \"annotations\": {\r\n \"list\": [\r\n + \ {\r\n \"builtIn\": 1,\r\n \"datasource\": \"-- Grafana + --\",\r\n \"enable\": true,\r\n \"hide\": false,\r\n \"iconColor\": + \"rgba(0, 211, 255, 1)\",\r\n \"limit\": 100,\r\n \"name\": + \"Annotations & Alerts\",\r\n \"showIn\": 0,\r\n \"type\": \"dashboard\"\r\n + \ }\r\n ]\r\n },\r\n \"description\": \"This dashboard can be + used to detect issues on the Loki stack, when deployed in Kubernetes. Shows: some + error metrics published by Promtail/Loki. Error and warning logs emitted by Promtail/Loki. + Memory and CPU usage of Promtail/Loki compared against the Kubernetes memory/cpu + limits and requests.\",\r\n \"editable\": true,\r\n \"gnetId\": 14055,\r\n + \ \"graphTooltip\": 0,\r\n \"id\": 31,\r\n \"links\": [],\r\n \"panels\": + [\r\n {\r\n \"dashboardFilter\": \"\",\r\n \"dashboardTags\": + [],\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": {\r\n + \ \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n \"folderId\": + null,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 0\r\n },\r\n \"id\": 987,\r\n + \ \"limit\": \"100\",\r\n \"nameFilter\": \"\",\r\n \"onlyAlertsOnDashboard\": + true,\r\n \"pluginVersion\": \"7.3.5\",\r\n \"show\": \"current\",\r\n + \ \"sortOrder\": 3,\r\n \"stateFilter\": [],\r\n \"timeFrom\": + null,\r\n \"timeShift\": null,\r\n \"title\": \"Summary of alerts + status\",\r\n \"type\": \"alertlist\"\r\n },\r\n {\r\n \"collapsed\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"gridPos\": {\r\n + \ \"h\": 1,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 6\r\n },\r\n \"id\": 435,\r\n \"panels\": [],\r\n \"title\": + \"Problems in the Loki Stack\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n + \ \"alert\": {\r\n \"alertRuleTags\": {},\r\n \"conditions\": + [\r\n {\r\n \"evaluator\": {\r\n \"params\": + [\r\n 0\r\n ],\r\n \"type\": \"gt\"\r\n + \ },\r\n \"operator\": {\r\n \"type\": + \"and\"\r\n },\r\n \"query\": {\r\n \"params\": + [\r\n \"A\",\r\n \"5m\",\r\n \"now\"\r\n + \ ]\r\n },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Loki has emitted error/warning + messages in the last 5m\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": true,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"description\": \"Total number of messages logged + by Loki itself\",\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n + \ \"links\": []\r\n },\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 5,\r\n \"w\": 9,\r\n \"x\": 0,\r\n \"y\": + 7\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 10,\r\n + \ \"interval\": \"1m\",\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": false,\r\n \"linewidth\": 1,\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"debug\",\r\n \"color\": \"#C0D8FF\"\r\n },\r\n {\r\n + \ \"alias\": \"info\",\r\n \"color\": \"#5794F2\"\r\n },\r\n + \ {\r\n \"alias\": \"warn\",\r\n \"color\": \"#FF9830\"\r\n + \ },\r\n {\r\n \"alias\": \"error\",\r\n \"color\": + \"#C4162A\"\r\n }\r\n ],\r\n \"spaceLength\": 10,\r\n \"stack\": + true,\r\n \"steppedLine\": false,\r\n \"targets\": [\r\n {\r\n + \ \"expr\": \"sum(rate(log_messages_total{app=\\\"loki\\\",level=~\\\"error|warn\\\"}[1m])) + by (level)\\n\",\r\n \"format\": \"time_series\",\r\n \"hide\": + false,\r\n \"interval\": \"\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"{{level}}\",\r\n \"refId\": \"A\"\r\n + \ },\r\n {\r\n \"expr\": \"log_messages_total\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": true,\r\n \"interval\": + \"\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": \"\",\r\n + \ \"refId\": \"B\"\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 0,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Number of error/warning messages logged by Loki itself\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"none\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + \"0\",\r\n \"show\": true\r\n },\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"datasource\": \"Loki\",\r\n \"fieldConfig\": {\r\n \"defaults\": + {},\r\n \"overrides\": []\r\n },\r\n \"gridPos\": {\r\n + \ \"h\": 5,\r\n \"w\": 15,\r\n \"x\": 9,\r\n \"y\": + 7\r\n },\r\n \"id\": 511,\r\n \"options\": {\r\n \"dedupStrategy\": + \"none\",\r\n \"showLabels\": false,\r\n \"showTime\": false,\r\n + \ \"sortOrder\": \"Descending\",\r\n \"wrapLogMessage\": false\r\n + \ },\r\n \"pluginVersion\": \"7.3.5\",\r\n \"targets\": [\r\n + \ {\r\n \"expr\": \"{app=\\\"loki\\\"} | logfmt | level=\\\"warn\\\" + or level=\\\"error\\\"\",\r\n \"legendFormat\": \"\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"timeFrom\": null,\r\n \"timeShift\": + null,\r\n \"title\": \"Error/warning messages logged by Loki itself\",\r\n + \ \"type\": \"logs\"\r\n },\r\n {\r\n \"alert\": {\r\n + \ \"alertRuleTags\": {},\r\n \"conditions\": [\r\n {\r\n + \ \"evaluator\": {\r\n \"params\": [\r\n 0\r\n + \ ],\r\n \"type\": \"gt\"\r\n },\r\n + \ \"operator\": {\r\n \"type\": \"and\"\r\n },\r\n + \ \"query\": {\r\n \"params\": [\r\n \"A\",\r\n + \ \"5m\",\r\n \"now\"\r\n ]\r\n + \ },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Promtail has emitted error/warning + messages in the last 5m\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": true,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"description\": \"Total number of messages logged + by Promtail\",\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n + \ \"links\": []\r\n },\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 5,\r\n \"w\": 9,\r\n \"x\": 0,\r\n \"y\": + 12\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 106,\r\n + \ \"interval\": \"1m\",\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": false,\r\n \"linewidth\": 1,\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"debug\",\r\n \"color\": \"#C0D8FF\"\r\n },\r\n {\r\n + \ \"alias\": \"info\",\r\n \"color\": \"#5794F2\"\r\n },\r\n + \ {\r\n \"alias\": \"warn\",\r\n \"color\": \"#FF9830\"\r\n + \ },\r\n {\r\n \"alias\": \"error\",\r\n \"color\": + \"#C4162A\"\r\n }\r\n ],\r\n \"spaceLength\": 10,\r\n \"stack\": + true,\r\n \"steppedLine\": false,\r\n \"targets\": [\r\n {\r\n + \ \"expr\": \"sum(rate(log_messages_total{app_kubernetes_io_name=\\\"promtail\\\",level=~\\\"error|warn\\\"}[1m])) + by (level)\\n\",\r\n \"format\": \"time_series\",\r\n \"hide\": + false,\r\n \"interval\": \"\",\r\n \"intervalFactor\": 1,\r\n + \ \"legendFormat\": \"{{level}}\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"thresholds\": [\r\n {\r\n \"colorMode\": + \"critical\",\r\n \"fill\": true,\r\n \"line\": true,\r\n + \ \"op\": \"gt\",\r\n \"value\": 0,\r\n \"visible\": + true\r\n }\r\n ],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Error/warning messages + logged by Promtail\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n + \ \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n },\r\n + \ \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"none\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"datasource\": \"Loki\",\r\n + \ \"fieldConfig\": {\r\n \"defaults\": {},\r\n \"overrides\": + []\r\n },\r\n \"gridPos\": {\r\n \"h\": 5,\r\n \"w\": + 15,\r\n \"x\": 9,\r\n \"y\": 12\r\n },\r\n \"id\": + 586,\r\n \"options\": {\r\n \"dedupStrategy\": \"none\",\r\n \"showLabels\": + false,\r\n \"showTime\": false,\r\n \"sortOrder\": \"Descending\",\r\n + \ \"wrapLogMessage\": false\r\n },\r\n \"pluginVersion\": + \"7.3.5\",\r\n \"targets\": [\r\n {\r\n \"expr\": \"{app=\\\"promtail\\\"} + | logfmt | level=\\\"warn\\\" or level=\\\"error\\\"\",\r\n \"legendFormat\": + \"\",\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeShift\": null,\r\n \"title\": \"Error/warning messages + logged by Promtail\",\r\n \"type\": \"logs\"\r\n },\r\n {\r\n + \ \"alert\": {\r\n \"alertRuleTags\": {},\r\n \"conditions\": + [\r\n {\r\n \"evaluator\": {\r\n \"params\": + [\r\n 0.01\r\n ],\r\n \"type\": + \"gt\"\r\n },\r\n \"operator\": {\r\n \"type\": + \"and\"\r\n },\r\n \"query\": {\r\n \"params\": + [\r\n \"A\",\r\n \"5m\",\r\n \"now\"\r\n + \ ]\r\n },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"max\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Loki distributor has failed + to send batches to ingesters\",\r\n \"noDataState\": \"no_data\",\r\n + \ \"notifications\": []\r\n },\r\n \"aliasColors\": {},\r\n + \ \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 5,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 17\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 676,\r\n + \ \"interval\": \"1m\",\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"rate(loki_distributor_ingester_append_failures_total[1m])*60\",\r\n + \ \"instant\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"\",\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 0.01,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Loki (distributor) - failed batch appends sent to ingesters\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"alert\": {\r\n \"alertRuleTags\": {},\r\n + \ \"conditions\": [\r\n {\r\n \"evaluator\": {\r\n + \ \"params\": [\r\n 0.01\r\n ],\r\n + \ \"type\": \"gt\"\r\n },\r\n \"operator\": + {\r\n \"type\": \"and\"\r\n },\r\n \"query\": + {\r\n \"params\": [\r\n \"A\",\r\n \"5m\",\r\n + \ \"now\"\r\n ]\r\n },\r\n \"reducer\": + {\r\n \"params\": [],\r\n \"type\": \"max\"\r\n + \ },\r\n \"type\": \"query\"\r\n }\r\n ],\r\n + \ \"executionErrorState\": \"keep_state\",\r\n \"for\": \"5m\",\r\n + \ \"frequency\": \"1m\",\r\n \"handler\": 1,\r\n \"name\": + \"Promtail has dropped logs in the last 5m\",\r\n \"noDataState\": \"no_data\",\r\n + \ \"notifications\": []\r\n },\r\n \"aliasColors\": {},\r\n + \ \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"description\": + \"Number of log entries dropped because failed to be sent to the Loki ingester + after all retries.\",\r\n \"fieldConfig\": {\r\n \"defaults\": + {},\r\n \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 5,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 22\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 677,\r\n \"interval\": \"1m\",\r\n \"legend\": + {\r\n \"avg\": false,\r\n \"current\": false,\r\n \"max\": + false,\r\n \"min\": false,\r\n \"show\": true,\r\n \"total\": + false,\r\n \"values\": false\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 2,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"rate(promtail_dropped_entries_total[1m])*60\",\r\n + \ \"instant\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"pod={{kubernetes_pod_name}}, instance={{instance}}\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [\r\n {\r\n + \ \"colorMode\": \"critical\",\r\n \"fill\": true,\r\n \"line\": + true,\r\n \"op\": \"gt\",\r\n \"value\": 0.01,\r\n \"visible\": + true\r\n }\r\n ],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Promtail - dropped + log entries\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": + 0,\r\n \"value_type\": \"individual\"\r\n },\r\n \"type\": + \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": + \"time\",\r\n \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"collapsed\": false,\r\n + \ \"datasource\": \"Prometheus\",\r\n \"gridPos\": {\r\n \"h\": + 1,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": 27\r\n },\r\n + \ \"id\": 47,\r\n \"panels\": [],\r\n \"title\": \"Logging + activity\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"alert\": + {\r\n \"alertRuleTags\": {},\r\n \"conditions\": [\r\n {\r\n + \ \"evaluator\": {\r\n \"params\": [\r\n 5000\r\n + \ ],\r\n \"type\": \"gt\"\r\n },\r\n + \ \"operator\": {\r\n \"type\": \"and\"\r\n },\r\n + \ \"query\": {\r\n \"params\": [\r\n \"A\",\r\n + \ \"1m\",\r\n \"now\"\r\n ]\r\n + \ },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"The number of streams in Loki + is above 5000\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"description\": \"The total number of streams created + per tenant.\\nThis should not increase after startup.\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {\r\n \"links\": []\r\n },\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 28\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 78,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + true\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"loki_ingester_memory_streams\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"Streams\",\r\n + \ \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 5000,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Number of Streams in Loki\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"description\": \"The total number of log entries + received per tenant (not necessarily of lines, as an entry can have more than + one line of text).\",\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 34\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 56,\r\n + \ \"legend\": {\r\n \"avg\": false,\r\n \"current\": false,\r\n + \ \"max\": false,\r\n \"min\": false,\r\n \"show\": + true,\r\n \"total\": false,\r\n \"values\": false\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 1,\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.5\",\r\n + \ \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"rate(loki_distributor_lines_received_total[1m])\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Loki (distributor) - log entries received per second\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"alert\": {\r\n \"alertRuleTags\": {},\r\n + \ \"conditions\": [\r\n {\r\n \"evaluator\": {\r\n + \ \"params\": [\r\n 1000000\r\n ],\r\n + \ \"type\": \"gt\"\r\n },\r\n \"operator\": + {\r\n \"type\": \"and\"\r\n },\r\n \"query\": + {\r\n \"params\": [\r\n \"A\",\r\n \"5m\",\r\n + \ \"now\"\r\n ]\r\n },\r\n \"reducer\": + {\r\n \"params\": [],\r\n \"type\": \"avg\"\r\n + \ },\r\n \"type\": \"query\"\r\n }\r\n ],\r\n + \ \"executionErrorState\": \"keep_state\",\r\n \"for\": \"1m\",\r\n + \ \"frequency\": \"1m\",\r\n \"handler\": 1,\r\n \"name\": + \"Quantity of data received by the Loki ingester is suspiciously high\",\r\n \"noDataState\": + \"no_data\",\r\n \"notifications\": []\r\n },\r\n \"aliasColors\": + {},\r\n \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"description\": + \"The total number of uncompressed bytes received per tenant.\\n\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {\r\n \"links\": []\r\n },\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 40\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 137,\r\n \"legend\": {\r\n \"avg\": + false,\r\n \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(loki_distributor_bytes_received_total[1m]))\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [\r\n {\r\n + \ \"colorMode\": \"critical\",\r\n \"fill\": true,\r\n \"line\": + true,\r\n \"op\": \"gt\",\r\n \"value\": 1000000,\r\n \"visible\": + true\r\n }\r\n ],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Bytes received in + ingester per second\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n + \ \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n },\r\n + \ \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"decbytes\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"collapsed\": false,\r\n + \ \"datasource\": \"Prometheus\",\r\n \"gridPos\": {\r\n \"h\": + 1,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": 46\r\n },\r\n + \ \"id\": 18,\r\n \"panels\": [],\r\n \"title\": \"Memory + usage\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"alert\": + {\r\n \"alertRuleTags\": {},\r\n \"conditions\": [\r\n {\r\n + \ \"evaluator\": {\r\n \"params\": [\r\n 80\r\n + \ ],\r\n \"type\": \"gt\"\r\n },\r\n + \ \"operator\": {\r\n \"type\": \"and\"\r\n },\r\n + \ \"query\": {\r\n \"params\": [\r\n \"A\",\r\n + \ \"5m\",\r\n \"now\"\r\n ]\r\n + \ },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"1m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Loki memory usage is above + 80% of the defined limit\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 2,\r\n \"description\": \"Percentage + of actual usage over configured limit\",\r\n \"editable\": true,\r\n \"error\": + false,\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 0,\r\n \"fillGradient\": 0,\r\n \"grid\": {},\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 47\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 247,\r\n + \ \"isNew\": true,\r\n \"legend\": {\r\n \"alignAsTable\": + false,\r\n \"avg\": false,\r\n \"current\": false,\r\n \"max\": + false,\r\n \"min\": false,\r\n \"rightSide\": false,\r\n \"show\": + true,\r\n \"sideWidth\": 200,\r\n \"sort\": \"current\",\r\n + \ \"sortDesc\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 2,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"100 + * max by(pod,container) (container_memory_working_set_bytes{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"loki.*\\\"})\\n/\\non + (pod,container) group_left kube_pod_container_resource_limits_memory_bytes{pod=~\\\"loki.*\\\"}\",\r\n + \ \"hide\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"Usage (pod={{pod}}, container={{container}}, name={{name}})\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [\r\n {\r\n + \ \"colorMode\": \"critical\",\r\n \"fill\": true,\r\n \"line\": + true,\r\n \"op\": \"gt\",\r\n \"value\": 80,\r\n \"visible\": + true\r\n }\r\n ],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Loki memory usage + (% of Kubernetes memory limit)\",\r\n \"tooltip\": {\r\n \"msResolution\": + false,\r\n \"shared\": true,\r\n \"sort\": 2,\r\n \"value_type\": + \"cumulative\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"$$hashKey\": \"object:241\",\r\n + \ \"format\": \"percent\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": \"100\",\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"$$hashKey\": \"object:242\",\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"decimals\": 2,\r\n \"editable\": true,\r\n \"error\": + false,\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 0,\r\n \"fillGradient\": 0,\r\n \"grid\": {},\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 53\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 25,\r\n + \ \"isNew\": true,\r\n \"legend\": {\r\n \"alignAsTable\": + false,\r\n \"avg\": false,\r\n \"current\": false,\r\n \"max\": + false,\r\n \"min\": false,\r\n \"rightSide\": false,\r\n \"show\": + true,\r\n \"sideWidth\": 200,\r\n \"sort\": \"current\",\r\n + \ \"sortDesc\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 2,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"repeat\": + null,\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"Limit\",\r\n \"color\": \"rgba(255, 255, 255, 0.56)\",\r\n \"dashes\": + true\r\n },\r\n {\r\n \"alias\": \"Request\",\r\n + \ \"color\": \"rgba(255, 255, 255, 0.78)\"\r\n },\r\n {\r\n + \ \"alias\": \"Usage\",\r\n \"fill\": 1\r\n }\r\n + \ ],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"container_memory_working_set_bytes{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"loki.*\\\"}\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"Usage (pod={{pod}}, container={{container}})\",\r\n \"metric\": \"container_memory_usage:sort_desc\",\r\n + \ \"refId\": \"A\",\r\n \"step\": 10\r\n },\r\n + \ {\r\n \"expr\": \"min(kube_pod_container_resource_limits_memory_bytes{pod=~\\\"loki.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"Limit\",\r\n + \ \"refId\": \"B\"\r\n },\r\n {\r\n \"expr\": + \"min(kube_pod_container_resource_requests_memory_bytes{pod=~\\\"loki.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"Request\",\r\n + \ \"refId\": \"C\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Loki memory usage vs Kubernetes limit and request\",\r\n + \ \"tooltip\": {\r\n \"msResolution\": false,\r\n \"shared\": + true,\r\n \"sort\": 2,\r\n \"value_type\": \"cumulative\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"$$hashKey\": \"object:581\",\r\n \"format\": \"bytes\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"$$hashKey\": \"object:582\",\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"alert\": {\r\n \"alertRuleTags\": {},\r\n \"conditions\": + [\r\n {\r\n \"evaluator\": {\r\n \"params\": + [\r\n 80\r\n ],\r\n \"type\": \"gt\"\r\n + \ },\r\n \"operator\": {\r\n \"type\": + \"and\"\r\n },\r\n \"query\": {\r\n \"params\": + [\r\n \"A\",\r\n \"5m\",\r\n \"now\"\r\n + \ ]\r\n },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Promtail memory usage is above + 80% of the defined limit\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 2,\r\n \"description\": \"Percentage + of actual usage over configured limit\",\r\n \"editable\": true,\r\n \"error\": + false,\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 0,\r\n \"fillGradient\": 0,\r\n \"grid\": {},\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 59\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 772,\r\n + \ \"isNew\": true,\r\n \"legend\": {\r\n \"alignAsTable\": + false,\r\n \"avg\": false,\r\n \"current\": false,\r\n \"max\": + false,\r\n \"min\": false,\r\n \"rightSide\": false,\r\n \"show\": + true,\r\n \"sideWidth\": 200,\r\n \"sort\": \"current\",\r\n + \ \"sortDesc\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 2,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"100* + max by(pod,container) (container_memory_working_set_bytes{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"promtail.*\\\"})\\n/\\non + (pod,container) (kube_pod_container_resource_limits_memory_bytes{pod=~\\\"promtail.*\\\"})\",\r\n + \ \"hide\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"\",\r\n \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 80,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Promtail memory usage (% of Kubernetes memory limit)\",\r\n \"tooltip\": + {\r\n \"msResolution\": false,\r\n \"shared\": true,\r\n \"sort\": + 2,\r\n \"value_type\": \"cumulative\"\r\n },\r\n \"type\": + \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": + \"time\",\r\n \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"$$hashKey\": + \"object:335\",\r\n \"format\": \"percent\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": \"100\",\r\n \"min\": + \"0\",\r\n \"show\": true\r\n },\r\n {\r\n \"$$hashKey\": + \"object:336\",\r\n \"format\": \"short\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": false\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 2,\r\n \"editable\": true,\r\n + \ \"error\": false,\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 0,\r\n \"fillGradient\": 0,\r\n \"grid\": + {},\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 65\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 1103,\r\n \"isNew\": true,\r\n \"legend\": + {\r\n \"alignAsTable\": false,\r\n \"avg\": false,\r\n \"current\": + false,\r\n \"max\": false,\r\n \"min\": false,\r\n \"rightSide\": + false,\r\n \"show\": true,\r\n \"sideWidth\": 200,\r\n \"sort\": + \"current\",\r\n \"sortDesc\": true,\r\n \"total\": false,\r\n + \ \"values\": false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": + 2,\r\n \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n + \ \"options\": {\r\n \"alertThreshold\": true\r\n },\r\n + \ \"percentage\": false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": + 5,\r\n \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [\r\n {\r\n \"alias\": \"Limit\",\r\n \"color\": + \"rgba(255, 255, 255, 0.56)\",\r\n \"dashes\": true\r\n },\r\n + \ {\r\n \"alias\": \"Request\",\r\n \"color\": \"rgba(255, + 255, 255, 0.78)\"\r\n },\r\n {\r\n \"alias\": \"Usage\",\r\n + \ \"fill\": 1\r\n }\r\n ],\r\n \"spaceLength\": + 10,\r\n \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"container_memory_working_set_bytes{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"promtail.*\\\"}\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"Usage (pod={{pod}}, container={{container}})\",\r\n \"metric\": \"container_memory_usage:sort_desc\",\r\n + \ \"refId\": \"A\",\r\n \"step\": 10\r\n },\r\n + \ {\r\n \"expr\": \"min(kube_pod_container_resource_limits_memory_bytes{pod=~\\\"promtail.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"Limit\",\r\n + \ \"refId\": \"B\"\r\n },\r\n {\r\n \"expr\": + \"min(kube_pod_container_resource_requests_memory_bytes{pod=~\\\"promtail.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"Request\",\r\n + \ \"refId\": \"C\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Promtail memory usage vs Kubernetes limit and request\",\r\n + \ \"tooltip\": {\r\n \"msResolution\": false,\r\n \"shared\": + true,\r\n \"sort\": 2,\r\n \"value_type\": \"cumulative\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"$$hashKey\": \"object:581\",\r\n \"format\": \"bytes\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"$$hashKey\": \"object:582\",\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"collapsed\": false,\r\n \"datasource\": \"Prometheus\",\r\n \"gridPos\": + {\r\n \"h\": 1,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 71\r\n },\r\n \"id\": 8,\r\n \"panels\": [],\r\n \"title\": + \"CPU Usage\",\r\n \"type\": \"row\"\r\n },\r\n {\r\n \"alert\": + {\r\n \"alertRuleTags\": {},\r\n \"conditions\": [\r\n {\r\n + \ \"evaluator\": {\r\n \"params\": [\r\n 80\r\n + \ ],\r\n \"type\": \"gt\"\r\n },\r\n + \ \"operator\": {\r\n \"type\": \"and\"\r\n },\r\n + \ \"query\": {\r\n \"params\": [\r\n \"A\",\r\n + \ \"5m\",\r\n \"now\"\r\n ]\r\n + \ },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"1m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Loki CPU usage is above 80% + of the defined limit\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 3,\r\n \"description\": \"percentage + of configured limit\",\r\n \"editable\": true,\r\n \"error\": false,\r\n + \ \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 0,\r\n \"fillGradient\": 0,\r\n \"grid\": {},\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 72\r\n },\r\n \"height\": \"\",\r\n \"hiddenSeries\": false,\r\n + \ \"id\": 347,\r\n \"isNew\": true,\r\n \"legend\": {\r\n + \ \"alignAsTable\": false,\r\n \"avg\": false,\r\n \"current\": + false,\r\n \"max\": false,\r\n \"min\": false,\r\n \"rightSide\": + false,\r\n \"show\": true,\r\n \"sort\": null,\r\n \"sortDesc\": + null,\r\n \"total\": false,\r\n \"values\": false\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 2,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"connected\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"100* rate(container_cpu_usage_seconds_total{container!=\\\"POD\\\",container!=\\\"\\\",pod=~\\\"loki.*\\\"}[1m])\\n/\\non + (pod,container) kube_pod_container_resource_limits_cpu_cores{container=\\\"loki\\\"}\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"\",\r\n \"metric\": \"container_cpu\",\r\n \"refId\": + \"A\",\r\n \"step\": 10\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 80,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Loki CPU usage (% of Kubernetes CPU limit)\",\r\n \"tooltip\": {\r\n + \ \"msResolution\": true,\r\n \"shared\": true,\r\n \"sort\": + 2,\r\n \"value_type\": \"cumulative\"\r\n },\r\n \"type\": + \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": + \"time\",\r\n \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"$$hashKey\": + \"object:413\",\r\n \"format\": \"percent\",\r\n \"label\": + \"cores\",\r\n \"logBase\": 1,\r\n \"max\": \"100\",\r\n + \ \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"$$hashKey\": \"object:414\",\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"decimals\": 3,\r\n \"description\": \"\",\r\n \"editable\": + true,\r\n \"error\": false,\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 0,\r\n \"fillGradient\": 0,\r\n \"grid\": + {},\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 78\r\n },\r\n \"height\": + \"\",\r\n \"hiddenSeries\": false,\r\n \"id\": 6,\r\n \"isNew\": + true,\r\n \"legend\": {\r\n \"alignAsTable\": false,\r\n \"avg\": + false,\r\n \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": false,\r\n \"show\": true,\r\n \"sort\": + null,\r\n \"sortDesc\": null,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 2,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"repeat\": + null,\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"CPU Limit\",\r\n \"color\": \"rgba(255, 255, 255, 0.46)\",\r\n \"dashes\": + true\r\n },\r\n {\r\n \"alias\": \"CPU Request\",\r\n + \ \"color\": \"rgba(255, 255, 255, 0.54)\"\r\n },\r\n {\r\n + \ \"alias\": \"Usage\",\r\n \"fill\": 1\r\n }\r\n + \ ],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"rate(container_cpu_usage_seconds_total{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"loki.*\\\"}[1m])\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"CPU Usage (pod={{pod}}, container={{container}})\",\r\n \"metric\": + \"container_cpu\",\r\n \"refId\": \"A\",\r\n \"step\": 10\r\n + \ },\r\n {\r\n \"expr\": \"min(kube_pod_container_resource_limits_cpu_cores{pod=~\\\"loki.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"CPU Limit\",\r\n + \ \"refId\": \"B\"\r\n },\r\n {\r\n \"expr\": + \"min(kube_pod_container_resource_requests_cpu_cores{pod=~\\\"loki.*\\\"})\",\r\n + \ \"hide\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"CPU Request\",\r\n \"refId\": \"C\"\r\n }\r\n ],\r\n + \ \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Loki CPU usage vs + Kubernetes limit and request\",\r\n \"tooltip\": {\r\n \"msResolution\": + true,\r\n \"shared\": true,\r\n \"sort\": 2,\r\n \"value_type\": + \"cumulative\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"$$hashKey\": \"object:666\",\r\n + \ \"format\": \"none\",\r\n \"label\": \"cores\",\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"$$hashKey\": \"object:667\",\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"alert\": {\r\n \"alertRuleTags\": {},\r\n \"conditions\": + [\r\n {\r\n \"evaluator\": {\r\n \"params\": + [\r\n 80\r\n ],\r\n \"type\": \"gt\"\r\n + \ },\r\n \"operator\": {\r\n \"type\": + \"and\"\r\n },\r\n \"query\": {\r\n \"params\": + [\r\n \"A\",\r\n \"5m\",\r\n \"now\"\r\n + \ ]\r\n },\r\n \"reducer\": {\r\n \"params\": + [],\r\n \"type\": \"avg\"\r\n },\r\n \"type\": + \"query\"\r\n }\r\n ],\r\n \"executionErrorState\": + \"keep_state\",\r\n \"for\": \"5m\",\r\n \"frequency\": \"1m\",\r\n + \ \"handler\": 1,\r\n \"name\": \"Promtail CPU usage is above + 80% of the defined limit\",\r\n \"noDataState\": \"no_data\",\r\n \"notifications\": + []\r\n },\r\n \"aliasColors\": {},\r\n \"bars\": false,\r\n + \ \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 3,\r\n \"description\": \"percentage + of configured limit\",\r\n \"editable\": true,\r\n \"error\": false,\r\n + \ \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 0,\r\n \"fillGradient\": 0,\r\n \"grid\": {},\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 84\r\n },\r\n \"height\": \"\",\r\n \"hiddenSeries\": false,\r\n + \ \"id\": 910,\r\n \"isNew\": true,\r\n \"legend\": {\r\n + \ \"alignAsTable\": false,\r\n \"avg\": false,\r\n \"current\": + false,\r\n \"max\": false,\r\n \"min\": false,\r\n \"rightSide\": + false,\r\n \"show\": true,\r\n \"sort\": null,\r\n \"sortDesc\": + null,\r\n \"total\": false,\r\n \"values\": false\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 2,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"connected\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.5\",\r\n \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"100 * rate(container_cpu_usage_seconds_total{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"promtail.*\\\"}[1m])\\n/\\non + (pod,container) kube_pod_container_resource_limits_cpu_cores{pod=~\\\"promtail.*\\\"}\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"\",\r\n \"metric\": \"container_cpu\",\r\n \"refId\": + \"A\",\r\n \"step\": 10\r\n }\r\n ],\r\n \"thresholds\": + [\r\n {\r\n \"colorMode\": \"critical\",\r\n \"fill\": + true,\r\n \"line\": true,\r\n \"op\": \"gt\",\r\n \"value\": + 80,\r\n \"visible\": true\r\n }\r\n ],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Promtail CPU usage (% of Kubernetes limit)\",\r\n \"tooltip\": {\r\n + \ \"msResolution\": true,\r\n \"shared\": true,\r\n \"sort\": + 2,\r\n \"value_type\": \"cumulative\"\r\n },\r\n \"type\": + \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": + \"time\",\r\n \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"$$hashKey\": + \"object:231\",\r\n \"format\": \"percent\",\r\n \"label\": + \"cores\",\r\n \"logBase\": 1,\r\n \"max\": \"100\",\r\n + \ \"min\": \"0\",\r\n \"show\": true\r\n },\r\n + \ {\r\n \"$$hashKey\": \"object:232\",\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"decimals\": 3,\r\n \"description\": \"\",\r\n \"editable\": + true,\r\n \"error\": false,\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 0,\r\n \"fillGradient\": 0,\r\n \"grid\": + {},\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 24,\r\n + \ \"x\": 0,\r\n \"y\": 90\r\n },\r\n \"height\": + \"\",\r\n \"hiddenSeries\": false,\r\n \"id\": 1104,\r\n \"isNew\": + true,\r\n \"legend\": {\r\n \"alignAsTable\": false,\r\n \"avg\": + false,\r\n \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": false,\r\n \"show\": true,\r\n \"sort\": + null,\r\n \"sortDesc\": null,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 2,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"connected\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [\r\n {\r\n \"alias\": \"CPU Limit\",\r\n \"color\": + \"rgba(255, 255, 255, 0.46)\",\r\n \"dashes\": true\r\n },\r\n + \ {\r\n \"alias\": \"CPU Request\",\r\n \"color\": + \"rgba(255, 255, 255, 0.54)\"\r\n },\r\n {\r\n \"alias\": + \"Usage\",\r\n \"fill\": 1\r\n }\r\n ],\r\n \"spaceLength\": + 10,\r\n \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"rate(container_cpu_usage_seconds_total{container!=\\\"\\\",container!=\\\"POD\\\",pod=~\\\"promtail.*\\\"}[1m])\",\r\n + \ \"interval\": \"10s\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"CPU Usage (pod={{pod}}, container={{container}})\",\r\n \"metric\": + \"container_cpu\",\r\n \"refId\": \"A\",\r\n \"step\": 10\r\n + \ },\r\n {\r\n \"expr\": \"min(kube_pod_container_resource_limits_cpu_cores{pod=~\\\"promtail.*\\\"})\",\r\n + \ \"interval\": \"\",\r\n \"legendFormat\": \"CPU Limit\",\r\n + \ \"refId\": \"B\"\r\n },\r\n {\r\n \"expr\": + \"min(kube_pod_container_resource_requests_cpu_cores{pod=~\\\"promtail.*\\\"})\",\r\n + \ \"hide\": false,\r\n \"interval\": \"\",\r\n \"legendFormat\": + \"CPU Request\",\r\n \"refId\": \"C\"\r\n }\r\n ],\r\n + \ \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Promtail CPU usage + vs Kubernetes limit and request\",\r\n \"tooltip\": {\r\n \"msResolution\": + true,\r\n \"shared\": true,\r\n \"sort\": 2,\r\n \"value_type\": + \"cumulative\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"$$hashKey\": \"object:666\",\r\n + \ \"format\": \"none\",\r\n \"label\": \"cores\",\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"$$hashKey\": \"object:667\",\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n }\r\n ],\r\n + \ \"refresh\": \"1m\",\r\n \"schemaVersion\": 27,\r\n \"style\": \"dark\",\r\n + \ \"tags\": [\r\n \"tools\",\r\n \"loki\"\r\n ],\r\n \"templating\": + {\r\n \"list\": []\r\n },\r\n \"time\": {\r\n \"from\": \"now-24h\",\r\n + \ \"to\": \"now\"\r\n },\r\n \"timepicker\": {\r\n \"refresh_intervals\": + [\r\n \"10s\",\r\n \"30s\",\r\n \"1m\",\r\n \"5m\",\r\n + \ \"15m\",\r\n \"30m\",\r\n \"1h\",\r\n \"2h\",\r\n + \ \"1d\"\r\n ],\r\n \"time_options\": [\r\n \"5m\",\r\n + \ \"15m\",\r\n \"1h\",\r\n \"6h\",\r\n \"12h\",\r\n + \ \"24h\",\r\n \"2d\",\r\n \"7d\",\r\n \"30d\"\r\n + \ ]\r\n },\r\n \"timezone\": \"\",\r\n \"title\": \"Loki stack monitoring + (Promtail, Loki)\",\r\n \"uid\": \"loki_stack_monitoring_quortex\",\r\n \"version\": + 1\r\n }" diff --git a/src/terraform/modules/loki/crds/loki-promtail-dashboard.json b/src/terraform/modules/loki/crds/loki-promtail-dashboard.json new file mode 100644 index 00000000..4ad235ce --- /dev/null +++ b/src/terraform/modules/loki/crds/loki-promtail-dashboard.json @@ -0,0 +1,409 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "an example for loki and promtail.", + "editable": true, + "gnetId": 10004, + "graphTooltip": 1, + "id": 30, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(loki_distributor_bytes_received_total[5m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Bytes Receiced", + "refId": "B" + }, + { + "expr": "sum (rate(loki_distributor_lines_received_total[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Lines Received", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Loki", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "lag", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (rate(promtail_read_bytes_total[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "read", + "refId": "B" + }, + { + "expr": "sum (rate(promtail_sent_bytes_total[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "sent", + "refId": "C" + }, + { + "expr": "sum (rate(promtail_encoded_bytes_total[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "encoded", + "refId": "D" + }, + { + "expr": "sum(promtail_file_bytes_total - promtail_read_bytes_total)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "lag", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Promtail", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "promtail_file_bytes_total - promtail_read_bytes_total > 100000", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{path}}", + "refId": "A" + }, + { + "expr": "promtail_file_bytes_total - promtail_read_bytes_total < -100000", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{path}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Lag", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Loki-Promtail", + "uid": "htcadXCiz", + "version": 1 + } \ No newline at end of file diff --git a/src/terraform/modules/loki/crds/loki-promtail-dashboard.yaml b/src/terraform/modules/loki/crds/loki-promtail-dashboard.yaml new file mode 100644 index 00000000..59132b8c --- /dev/null +++ b/src/terraform/modules/loki/crds/loki-promtail-dashboard.yaml @@ -0,0 +1,145 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-promtail-dashboard + namespace: ${namespace} + labels: + grafana_dashboard: "1" + annotations: + k8s-sidecar-target-directory: /tmp/dashboards/Infrastructure +data: + loki-promtail-dashboard.json: "{\r\n \"annotations\": {\r\n \"list\": [\r\n + \ {\r\n \"builtIn\": 1,\r\n \"datasource\": \"-- Grafana + --\",\r\n \"enable\": true,\r\n \"hide\": true,\r\n \"iconColor\": + \"rgba(0, 211, 255, 1)\",\r\n \"name\": \"Annotations & Alerts\",\r\n + \ \"type\": \"dashboard\"\r\n }\r\n ]\r\n },\r\n \"description\": + \"an example for loki and promtail.\",\r\n \"editable\": true,\r\n \"gnetId\": + 10004,\r\n \"graphTooltip\": 1,\r\n \"id\": 30,\r\n \"links\": [],\r\n + \ \"panels\": [\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + null,\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n \"overrides\": + []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 9,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 0\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 2,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + false,\r\n \"current\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"total\": + false,\r\n \"values\": true\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"paceLength\": 10,\r\n \"percentage\": false,\r\n + \ \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": + false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": [],\r\n + \ \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum + (rate(loki_distributor_bytes_received_total[5m]))\",\r\n \"format\": + \"time_series\",\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"Bytes Receiced\",\r\n \"refId\": \"B\"\r\n },\r\n {\r\n + \ \"expr\": \"sum (rate(loki_distributor_lines_received_total[5m]))\",\r\n + \ \"format\": \"time_series\",\r\n \"interval\": \"\",\r\n + \ \"intervalFactor\": 1,\r\n \"legendFormat\": \"Lines Received\",\r\n + \ \"refId\": \"C\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Loki\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + null,\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n \"overrides\": + []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 9,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 9\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 3,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + false,\r\n \"current\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"sort\": + \"current\",\r\n \"sortDesc\": true,\r\n \"total\": false,\r\n + \ \"values\": true\r\n },\r\n \"lines\": true,\r\n \"linewidth\": + 1,\r\n \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"paceLength\": + 10,\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.5\",\r\n + \ \"pointradius\": 2,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"lag\",\r\n \"yaxis\": 2\r\n }\r\n ],\r\n \"spaceLength\": + 10,\r\n \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum (rate(promtail_read_bytes_total[5m]))\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"read\",\r\n \"refId\": \"B\"\r\n + \ },\r\n {\r\n \"expr\": \"sum (rate(promtail_sent_bytes_total[5m]))\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"sent\",\r\n \"refId\": \"C\"\r\n + \ },\r\n {\r\n \"expr\": \"sum (rate(promtail_encoded_bytes_total[5m]))\",\r\n + \ \"format\": \"time_series\",\r\n \"hide\": false,\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"encoded\",\r\n \"refId\": \"D\"\r\n + \ },\r\n {\r\n \"expr\": \"sum(promtail_file_bytes_total + - promtail_read_bytes_total)\",\r\n \"format\": \"time_series\",\r\n + \ \"intervalFactor\": 1,\r\n \"legendFormat\": \"lag\",\r\n + \ \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Promtail\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 2,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": \"0\",\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + null,\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n \"overrides\": + []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 8,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 18\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 5,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + false,\r\n \"current\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"sideWidth\": + null,\r\n \"sort\": \"current\",\r\n \"sortDesc\": true,\r\n + \ \"total\": false,\r\n \"values\": true\r\n },\r\n \"lines\": + true,\r\n \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"paceLength\": 10,\r\n \"percentage\": false,\r\n + \ \"pluginVersion\": \"7.5.5\",\r\n \"pointradius\": 2,\r\n \"points\": + false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": [],\r\n + \ \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"promtail_file_bytes_total + - promtail_read_bytes_total > 100000\",\r\n \"format\": \"time_series\",\r\n + \ \"hide\": false,\r\n \"interval\": \"\",\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"{{path}}\",\r\n \"refId\": \"A\"\r\n + \ },\r\n {\r\n \"expr\": \"promtail_file_bytes_total + - promtail_read_bytes_total < -100000\",\r\n \"format\": \"time_series\",\r\n + \ \"hide\": true,\r\n \"intervalFactor\": 1,\r\n \"legendFormat\": + \"{{path}}\",\r\n \"refId\": \"B\"\r\n }\r\n ],\r\n + \ \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Lag\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 2,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n },\r\n + \ {\r\n \"format\": \"short\",\r\n \"label\": null,\r\n + \ \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n }\r\n ],\r\n \"yaxis\": + {\r\n \"align\": false,\r\n \"alignLevel\": null\r\n }\r\n + \ }\r\n ],\r\n \"refresh\": \"5s\",\r\n \"schemaVersion\": 27,\r\n + \ \"style\": \"dark\",\r\n \"tags\": [],\r\n \"templating\": {\r\n \"list\": + []\r\n },\r\n \"time\": {\r\n \"from\": \"now-30m\",\r\n \"to\": + \"now\"\r\n },\r\n \"timepicker\": {\r\n \"refresh_intervals\": [\r\n + \ \"5s\",\r\n \"10s\",\r\n \"30s\",\r\n \"1m\",\r\n + \ \"5m\",\r\n \"15m\",\r\n \"30m\",\r\n \"1h\",\r\n + \ \"2h\",\r\n \"1d\"\r\n ],\r\n \"time_options\": [\r\n + \ \"5m\",\r\n \"15m\",\r\n \"1h\",\r\n \"6h\",\r\n + \ \"12h\",\r\n \"24h\",\r\n \"2d\",\r\n \"7d\",\r\n + \ \"30d\"\r\n ]\r\n },\r\n \"timezone\": \"\",\r\n \"title\": + \"Loki-Promtail\",\r\n \"uid\": \"htcadXCiz\",\r\n \"version\": 1\r\n }" diff --git a/src/terraform/modules/loki/crds/readme.md b/src/terraform/modules/loki/crds/readme.md new file mode 100644 index 00000000..224dab93 --- /dev/null +++ b/src/terraform/modules/loki/crds/readme.md @@ -0,0 +1,23 @@ +# Grafana dashboards + +## Loki-Promtail + +We can add a Loki-Promtail dashboard (ID: 10004) + +> We create the dashboard yaml as follows, just for reference in case we need to recreate it + +```powershell +kubectl create configmap loki-promtail-dashboard --from-file=loki-promtail-dashboard.json=./src/skaffold/crds/loki/loki-promtail-dashboard.json -n infrastructure -o yaml > ./src/skaffold/crds/loki/loki-promtail-dashboard.yaml +kubectl label --overwrite -f ./src/skaffold/crds/loki/loki-promtail-dashboard.yaml grafana_dashboard=1 +kubectl annotate --overwrite -f ./src/skaffold/crds/loki/loki-promtail-dashboard.yaml k8s-sidecar-target-directory=/tmp/dashboards/Infrastructure +``` + +## Loki stack monitoring + +We can add a Loki stack monitoring dashboard (ID: 14055) + +```powershell +kubectl create configmap loki-monitor-dashboard --from-file=loki-monitor-dashboard.json=./src/skaffold/crds/loki/loki-monitor-dashboard.json -n infrastructure -o yaml > ./src/skaffold/crds/loki/loki-monitor-dashboard.yaml +kubectl label --overwrite -f ./src/skaffold/crds/loki/loki-monitor-dashboard.yaml grafana_dashboard=1 +kubectl annotate --overwrite -f ./src/skaffold/crds/loki/loki-monitor-dashboard.yaml k8s-sidecar-target-directory=/tmp/dashboards/Infrastructure +``` diff --git a/src/terraform/modules/loki/loki-values.yaml b/src/terraform/modules/loki/loki-values.yaml new file mode 100644 index 00000000..8537dd28 --- /dev/null +++ b/src/terraform/modules/loki/loki-values.yaml @@ -0,0 +1,12 @@ +podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service-port": "3100" + "consul.hashicorp.com/transparent-proxy": "true" + +tracing: + jaegerAgentHost: jaeger-agent.${namespace}.svc.cluster.local + +serviceMonitor: + enabled: true + additionalLabels: + release: "prometheus" diff --git a/src/terraform/modules/loki/main.tf b/src/terraform/modules/loki/main.tf new file mode 100644 index 00000000..ef9d80d6 --- /dev/null +++ b/src/terraform/modules/loki/main.tf @@ -0,0 +1,58 @@ +resource "helm_release" "loki" { + name = "loki" + + repository = "https://grafana.github.io/helm-charts" + chart = "loki" + namespace = var.namespace + version = "2.5.0" +// wait = true +// wait_for_jobs = true + values = [ + "${templatefile("${path.module}/loki-values.yaml", { + namespace = var.namespace + })}" + ] +} + +resource "helm_release" "promtail" { + name = "promtail" + + repository = "https://grafana.github.io/helm-charts" + chart = "promtail" + namespace = var.namespace + version = "3.5.1" +// wait = true +// wait_for_jobs = true + values = [ + "${templatefile("${path.module}/promtail-values.yaml", { + namespace = var.namespace + })}" + ] +} + +resource "time_sleep" "wait_10_seconds" { + depends_on = [ + helm_release.loki, + ] + + create_duration = "10s" +} + +resource "kubectl_manifest" "loki-monitor-dashboard" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = "${templatefile("${path.module}/crds/loki-monitor-dashboard.yaml", + { + namespace = var.namespace + })}" +} + +resource "kubectl_manifest" "loki-promtail-dashboard" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = "${templatefile("${path.module}/crds/loki-promtail-dashboard.yaml",{ + namespace = var.namespace + })}" +} diff --git a/src/terraform/modules/loki/promtail-values.yaml b/src/terraform/modules/loki/promtail-values.yaml new file mode 100644 index 00000000..a84bff2e --- /dev/null +++ b/src/terraform/modules/loki/promtail-values.yaml @@ -0,0 +1,11 @@ +config: + lokiAddress: http://loki.${namespace}.svc.cluster.local:3100/loki/api/v1/push + +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/port: "http-metrics" + +serviceMonitor: + enabled: true + labels: + release: "prometheus" diff --git a/src/terraform/modules/loki/providers.tf b/src/terraform/modules/loki/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/loki/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/loki/variables.tf b/src/terraform/modules/loki/variables.tf new file mode 100644 index 00000000..779d2cf5 --- /dev/null +++ b/src/terraform/modules/loki/variables.tf @@ -0,0 +1,3 @@ +variable "namespace" { + type = string +} diff --git a/src/terraform/modules/prometheus/crds/jaeger-monitor.yaml b/src/terraform/modules/prometheus/crds/jaeger-monitor.yaml new file mode 100644 index 00000000..5210bf7f --- /dev/null +++ b/src/terraform/modules/prometheus/crds/jaeger-monitor.yaml @@ -0,0 +1,18 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: jaeger + namespace: infrastructure + labels: + app: jaeger + release: prometheus +spec: + jobLabel: jaeger-metrics + selector: + matchLabels: + app: jaeger + namespaceSelector: + matchNames: + - infrastructure + endpoints: + - port: admin-http diff --git a/src/terraform/modules/prometheus/crds/traefik-monitor.yaml b/src/terraform/modules/prometheus/crds/traefik-monitor.yaml new file mode 100644 index 00000000..c1d8cb64 --- /dev/null +++ b/src/terraform/modules/prometheus/crds/traefik-monitor.yaml @@ -0,0 +1,21 @@ + +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: traefik + namespace: infrastructure + labels: + app: traefik + release: prometheus +spec: + jobLabel: traefik-metrics + selector: + matchLabels: + app.kubernetes.io/instance: traefik + app.kubernetes.io/name: traefik-dashboard + namespaceSelector: + matchNames: + - infrastructure + endpoints: + - port: traefik + path: /metrics diff --git a/src/terraform/modules/prometheus/crds/traefik-rules.yaml b/src/terraform/modules/prometheus/crds/traefik-rules.yaml new file mode 100644 index 00000000..5b71f5e3 --- /dev/null +++ b/src/terraform/modules/prometheus/crds/traefik-rules.yaml @@ -0,0 +1,21 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: + meta.helm.sh/release-name: prometheus + meta.helm.sh/release-namespace: infrastructure + labels: + app: kube-prometheus-stack + release: prometheus + name: traefik-alert-rules + namespace: infrastructure +spec: + groups: + - name: Traefik + rules: + - alert: TooManyRequest + expr: avg(traefik_entrypoint_open_connections{job="traefik-dashboard",namespace="default"}) + > 5 + for: 1m + labels: + severity: critical diff --git a/src/terraform/modules/prometheus/main.tf b/src/terraform/modules/prometheus/main.tf new file mode 100644 index 00000000..737a6c9a --- /dev/null +++ b/src/terraform/modules/prometheus/main.tf @@ -0,0 +1,54 @@ +resource "helm_release" "prometheus-operator" { + name = "prometheus" + + repository = "https://prometheus-community.github.io/helm-charts" + chart = "kube-prometheus-stack" + namespace = var.namespace + version = "16.0.1" +// wait = true +// wait_for_jobs = true + values = [ + "${templatefile("${path.module}/prometheus-values.yaml", { + prometheus-domain = var.prometheus-domain-name, + namespace = var.namespace, + grafana-domain = var.grafana-domain-name, + })}" + ] +} + +resource "time_sleep" "wait_10_seconds" { + depends_on = [ + helm_release.prometheus-operator, + ] + + create_duration = "10s" +} + +resource "kubectl_manifest" "jaeger-monitor" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = file("${path.module}/crds/jaeger-monitor.yaml") +} + +resource "kubectl_manifest" "traefik-monitor" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = file("${path.module}/crds/traefik-monitor.yaml") +} + +resource "kubectl_manifest" "traefik-rules" { + depends_on = [ + time_sleep.wait_10_seconds + ] + yaml_body = file("${path.module}/crds/traefik-rules.yaml") +} + +output "prometheus-url" { + value = "https://${var.prometheus-domain-name}" +} + +output "grafana-url" { + value = "https://${var.grafana-domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/prometheus/prometheus-values.yaml b/src/terraform/modules/prometheus/prometheus-values.yaml new file mode 100644 index 00000000..967d4fbf --- /dev/null +++ b/src/terraform/modules/prometheus/prometheus-values.yaml @@ -0,0 +1,67 @@ +prometheus-node-exporter: + hostRootFsMount: false + +kubeApiServer: + enabled: true + tlsConfig: + serverName: kubernetes + insecureSkipVerify: true + +prometheus: + prometheusSpec: + podMetadata: + annotations: + "consul.hashicorp.com/connect-inject": 'true' + "consul.hashicorp.com/connect-service": "prometheus-kube-prometheus-prometheus" + "consul.hashicorp.com/transparent-proxy": "true" + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + hosts: + - ${prometheus-domain} + path: / + pathType: Prefix + tls: + - secretName: traefik-cert + hosts: + - ${prometheus-domain} + + +grafana: + defaultDashboardsEnabled: true + sidecar: + dashboards: + searchNamespace: ALL + enabled: true + label: grafana_dashboard + folder: /tmp/dashboards + provider: + foldersFromFilesStructure: true + annotations: + k8s-sidecar-target-directory: /tmp/dashboards/Infrastructure/Kubernetes + ingress: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + hosts: + - ${grafana-domain} + path: / + pathType: Prefix + tls: + - secretName: traefik-cert + hosts: + - ${grafana-domain} + additionalDataSources: + - name: Loki + access: proxy + orgId: 1 + type: loki + url: http://loki.${namespace}.svc.cluster.local:3100 + version: 1 + plugins: + - grafana-piechart-panel + - digrich-bubblechart-panel + - grafana-clock-panel diff --git a/src/terraform/modules/prometheus/providers.tf b/src/terraform/modules/prometheus/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/prometheus/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/prometheus/variables.tf b/src/terraform/modules/prometheus/variables.tf new file mode 100644 index 00000000..a38af37a --- /dev/null +++ b/src/terraform/modules/prometheus/variables.tf @@ -0,0 +1,11 @@ +variable "prometheus-domain-name" { + type = string +} + +variable "namespace" { + type = string +} + +variable "grafana-domain-name" { + type = string +} diff --git a/src/terraform/modules/traefik/cert-store.yaml b/src/terraform/modules/traefik/cert-store.yaml new file mode 100644 index 00000000..7ff5a25f --- /dev/null +++ b/src/terraform/modules/traefik/cert-store.yaml @@ -0,0 +1,9 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: TLSStore +metadata: + name: default + namespace: ${namespace} + +spec: + defaultCertificate: + secretName: traefik-cert diff --git a/src/terraform/modules/traefik/crds/traefik-dashboard-service.yaml b/src/terraform/modules/traefik/crds/traefik-dashboard-service.yaml new file mode 100644 index 00000000..1271e01b --- /dev/null +++ b/src/terraform/modules/traefik/crds/traefik-dashboard-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: traefik-dashboard + namespace: ${namespace} + labels: + app.kubernetes.io/instance: traefik + app.kubernetes.io/name: traefik-dashboard +spec: + type: ClusterIP + ports: + - name: traefik + port: 9000 + targetPort: traefik + protocol: TCP + selector: + app.kubernetes.io/instance: traefik + app.kubernetes.io/name: traefik \ No newline at end of file diff --git a/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.json b/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.json new file mode 100644 index 00000000..9c24b375 --- /dev/null +++ b/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.json @@ -0,0 +1,1416 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Simple dashboard for Traefik 2", + "editable": true, + "gnetId": 11462, + "graphTooltip": 0, + "id": 43, + "iteration": 1619877456583, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "time() - process_start_time_seconds{job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#37872D", + "#37872D", + "#C4162A" + ], + "datasource": "Prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "200%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(increase(traefik_service_requests_total{code=\"404\",protocol=~\"$protocol\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "traefik_requests_total", + "refId": "A", + "step": 60 + } + ], + "thresholds": "0,1", + "title": "404 Error Count", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "max" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#37872D", + "#C4162A", + "#C4162A" + ], + "datasource": "Prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 32, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "200%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false, + "ymax": null, + "ymin": null + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(increase(traefik_service_requests_total{code=\"503\",protocol=~\"$protocol\"}[$interval]))", + "refId": "A" + } + ], + "thresholds": "0,1", + "timeFrom": null, + "timeShift": null, + "title": "503 Error count", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "max" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 6, + "w": 4, + "x": 9, + "y": 0 + }, + "id": 18, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "sort": null, + "sortDesc": null, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "topk(5, sum(traefik_service_requests_total{protocol=~\"$protocol\"}) by (code))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{code}}", + "refId": "A" + } + ], + "title": "Top 5 $protocol return code", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 13, + "y": 0 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_entrypoint_request_duration_seconds_sum) / sum(traefik_entrypoint_requests_total) * 1000", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Average response time", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 7, + "x": 17, + "y": 0 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_service_request_duration_seconds_sum{protocol=~\"$protocol\"}) / sum(traefik_entrypoint_requests_total{protocol=~\"$protocol\"}) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average response time (ms)", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average response time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 10, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(traefik_service_requests_total{code=\"404\",method=\"GET\",protocol=~\"$protocol\"}[$interval])) by (service)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}} ", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bad Status Code Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "hideTimeOverride": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_service_requests_total[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total requests", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "process_open_fds{job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Used sockets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 14, + "x": 10, + "y": 12 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_service_requests_total{protocol=~\"http|https\"}[$interval])) by (service)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{service}} ", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Access to services", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 7, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_entrypoint_open_connections) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "ENTRYPOINT - Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 7, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_service_open_connections) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SERVICE - Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 25 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^[^234].*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(traefik_service_requests_total{protocol=~\"$protocol\"}[$interval])) by (code)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{code}}", + "refId": "A", + "step": 120 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Status Code Count ", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "traefik", + "load-balancer", + "docker", + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "traefik-dashboard", + "value": "traefik-dashboard" + }, + "datasource": "Prometheus", + "definition": "", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Job:", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(job)", + "refId": "Prometheus-job-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(traefik_service_requests_total, protocol)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Service:", + "multi": true, + "name": "protocol", + "options": [], + "query": { + "query": "label_values(traefik_service_requests_total, protocol)", + "refId": "Prometheus-protocol-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + "description": null, + "error": null, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Traefik 2", + "uid": "3ipsWfViz", + "version": 2 + } \ No newline at end of file diff --git a/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.yaml b/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.yaml new file mode 100644 index 00000000..7e3c63f4 --- /dev/null +++ b/src/terraform/modules/traefik/crds/traefik-grafana-dashboard.yaml @@ -0,0 +1,483 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: traefik-dashboard + namespace: ${namespace} + labels: + grafana_dashboard: "1" + annotations: + k8s-sidecar-target-directory: /tmp/dashboards/Infrastructure +data: + traefik-dashboard.json: "{\r\n \"annotations\": {\r\n \"list\": [\r\n {\r\n + \ \"builtIn\": 1,\r\n \"datasource\": \"-- Grafana --\",\r\n + \ \"enable\": true,\r\n \"hide\": true,\r\n \"iconColor\": + \"rgba(0, 211, 255, 1)\",\r\n \"name\": \"Annotations & Alerts\",\r\n + \ \"type\": \"dashboard\"\r\n }\r\n ]\r\n },\r\n \"description\": + \"Simple dashboard for Traefik 2\",\r\n \"editable\": true,\r\n \"gnetId\": + 11462,\r\n \"graphTooltip\": 0,\r\n \"id\": 43,\r\n \"iteration\": 1619877456583,\r\n + \ \"links\": [],\r\n \"panels\": [\r\n {\r\n \"cacheTimeout\": + null,\r\n \"colorBackground\": false,\r\n \"colorValue\": false,\r\n + \ \"colors\": [\r\n \"#299c46\",\r\n \"rgba(237, 129, + 40, 0.89)\",\r\n \"#d44a3a\"\r\n ],\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {},\r\n + \ \"overrides\": []\r\n },\r\n \"format\": \"s\",\r\n \"gauge\": + {\r\n \"maxValue\": 100,\r\n \"minValue\": 0,\r\n \"show\": + false,\r\n \"thresholdLabels\": false,\r\n \"thresholdMarkers\": + true\r\n },\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": + 3,\r\n \"x\": 0,\r\n \"y\": 0\r\n },\r\n \"id\": + 22,\r\n \"interval\": null,\r\n \"links\": [],\r\n \"mappingType\": + 1,\r\n \"mappingTypes\": [\r\n {\r\n \"name\": \"value + to text\",\r\n \"value\": 1\r\n },\r\n {\r\n \"name\": + \"range to text\",\r\n \"value\": 2\r\n }\r\n ],\r\n + \ \"maxDataPoints\": 100,\r\n \"nullPointMode\": \"connected\",\r\n + \ \"nullText\": null,\r\n \"postfix\": \"\",\r\n \"postfixFontSize\": + \"50%\",\r\n \"prefix\": \"\",\r\n \"prefixFontSize\": \"50%\",\r\n + \ \"rangeMaps\": [\r\n {\r\n \"from\": \"null\",\r\n + \ \"text\": \"N/A\",\r\n \"to\": \"null\"\r\n }\r\n + \ ],\r\n \"sparkline\": {\r\n \"fillColor\": \"rgba(31, + 118, 189, 0.18)\",\r\n \"full\": false,\r\n \"lineColor\": \"rgb(31, + 120, 193)\",\r\n \"show\": false\r\n },\r\n \"tableColumn\": + \"\",\r\n \"targets\": [\r\n {\r\n \"expr\": \"time() + - process_start_time_seconds{job=\\\"$job\\\"}\",\r\n \"format\": \"time_series\",\r\n + \ \"intervalFactor\": 2,\r\n \"refId\": \"A\"\r\n }\r\n + \ ],\r\n \"thresholds\": \"\",\r\n \"title\": \"Uptime\",\r\n + \ \"type\": \"singlestat\",\r\n \"valueFontSize\": \"80%\",\r\n \"valueMaps\": + [\r\n {\r\n \"op\": \"=\",\r\n \"text\": \"N/A\",\r\n + \ \"value\": \"null\"\r\n }\r\n ],\r\n \"valueName\": + \"current\"\r\n },\r\n {\r\n \"cacheTimeout\": null,\r\n \"colorBackground\": + false,\r\n \"colorValue\": true,\r\n \"colors\": [\r\n \"#37872D\",\r\n + \ \"#37872D\",\r\n \"#C4162A\"\r\n ],\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 0,\r\n \"fieldConfig\": {\r\n + \ \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n \"format\": + \"none\",\r\n \"gauge\": {\r\n \"maxValue\": 100,\r\n \"minValue\": + 0,\r\n \"show\": false,\r\n \"thresholdLabels\": false,\r\n + \ \"thresholdMarkers\": true\r\n },\r\n \"gridPos\": {\r\n + \ \"h\": 6,\r\n \"w\": 3,\r\n \"x\": 3,\r\n \"y\": + 0\r\n },\r\n \"id\": 26,\r\n \"interval\": null,\r\n \"links\": + [],\r\n \"mappingType\": 1,\r\n \"mappingTypes\": [\r\n {\r\n + \ \"name\": \"value to text\",\r\n \"value\": 1\r\n },\r\n + \ {\r\n \"name\": \"range to text\",\r\n \"value\": + 2\r\n }\r\n ],\r\n \"maxDataPoints\": 100,\r\n \"nullPointMode\": + \"connected\",\r\n \"nullText\": null,\r\n \"postfix\": \"\",\r\n + \ \"postfixFontSize\": \"50%\",\r\n \"prefix\": \"\",\r\n \"prefixFontSize\": + \"200%\",\r\n \"rangeMaps\": [\r\n {\r\n \"from\": + \"null\",\r\n \"text\": \"N/A\",\r\n \"to\": \"null\"\r\n + \ }\r\n ],\r\n \"sparkline\": {\r\n \"fillColor\": + \"rgba(31, 118, 189, 0.18)\",\r\n \"full\": false,\r\n \"lineColor\": + \"rgb(31, 120, 193)\",\r\n \"show\": false\r\n },\r\n \"tableColumn\": + \"\",\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(increase(traefik_service_requests_total{code=\\\"404\\\",protocol=~\\\"$protocol\\\"}[$interval]))\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 2,\r\n + \ \"legendFormat\": \"\",\r\n \"metric\": \"traefik_requests_total\",\r\n + \ \"refId\": \"A\",\r\n \"step\": 60\r\n }\r\n ],\r\n + \ \"thresholds\": \"0,1\",\r\n \"title\": \"404 Error Count\",\r\n + \ \"type\": \"singlestat\",\r\n \"valueFontSize\": \"200%\",\r\n + \ \"valueMaps\": [\r\n {\r\n \"op\": \"=\",\r\n \"text\": + \"N/A\",\r\n \"value\": \"null\"\r\n }\r\n ],\r\n \"valueName\": + \"max\"\r\n },\r\n {\r\n \"cacheTimeout\": null,\r\n \"colorBackground\": + false,\r\n \"colorValue\": true,\r\n \"colors\": [\r\n \"#37872D\",\r\n + \ \"#C4162A\",\r\n \"#C4162A\"\r\n ],\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 0,\r\n \"fieldConfig\": {\r\n + \ \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n \"format\": + \"none\",\r\n \"gauge\": {\r\n \"maxValue\": 100,\r\n \"minValue\": + 0,\r\n \"show\": false,\r\n \"thresholdLabels\": false,\r\n + \ \"thresholdMarkers\": true\r\n },\r\n \"gridPos\": {\r\n + \ \"h\": 6,\r\n \"w\": 3,\r\n \"x\": 6,\r\n \"y\": + 0\r\n },\r\n \"id\": 32,\r\n \"interval\": null,\r\n \"links\": + [],\r\n \"mappingType\": 1,\r\n \"mappingTypes\": [\r\n {\r\n + \ \"name\": \"value to text\",\r\n \"value\": 1\r\n },\r\n + \ {\r\n \"name\": \"range to text\",\r\n \"value\": + 2\r\n }\r\n ],\r\n \"maxDataPoints\": 100,\r\n \"nullPointMode\": + \"connected\",\r\n \"nullText\": null,\r\n \"postfix\": \"\",\r\n + \ \"postfixFontSize\": \"50%\",\r\n \"prefix\": \"\",\r\n \"prefixFontSize\": + \"200%\",\r\n \"rangeMaps\": [\r\n {\r\n \"from\": + \"null\",\r\n \"text\": \"N/A\",\r\n \"to\": \"null\"\r\n + \ }\r\n ],\r\n \"sparkline\": {\r\n \"fillColor\": + \"rgba(31, 118, 189, 0.18)\",\r\n \"full\": false,\r\n \"lineColor\": + \"rgb(31, 120, 193)\",\r\n \"show\": false,\r\n \"ymax\": null,\r\n + \ \"ymin\": null\r\n },\r\n \"tableColumn\": \"\",\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(increase(traefik_service_requests_total{code=\\\"503\\\",protocol=~\\\"$protocol\\\"}[$interval]))\",\r\n + \ \"refId\": \"A\"\r\n }\r\n ],\r\n \"thresholds\": + \"0,1\",\r\n \"timeFrom\": null,\r\n \"timeShift\": null,\r\n \"title\": + \"503 Error count\",\r\n \"type\": \"singlestat\",\r\n \"valueFontSize\": + \"200%\",\r\n \"valueMaps\": [\r\n {\r\n \"op\": \"=\",\r\n + \ \"text\": \"N/A\",\r\n \"value\": \"null\"\r\n }\r\n + \ ],\r\n \"valueName\": \"max\"\r\n },\r\n {\r\n \"aliasColors\": + {},\r\n \"breakPoint\": \"50%\",\r\n \"cacheTimeout\": null,\r\n + \ \"combine\": {\r\n \"label\": \"Others\",\r\n \"threshold\": + 0\r\n },\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {},\r\n \"overrides\": []\r\n },\r\n + \ \"fontSize\": \"80%\",\r\n \"format\": \"short\",\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 4,\r\n \"x\": 9,\r\n \"y\": + 0\r\n },\r\n \"id\": 18,\r\n \"interval\": null,\r\n \"legend\": + {\r\n \"percentage\": true,\r\n \"show\": true,\r\n \"sort\": + null,\r\n \"sortDesc\": null,\r\n \"values\": true\r\n },\r\n + \ \"legendType\": \"Right side\",\r\n \"links\": [],\r\n \"maxDataPoints\": + 3,\r\n \"nullPointMode\": \"connected\",\r\n \"pieType\": \"pie\",\r\n + \ \"strokeWidth\": 1,\r\n \"targets\": [\r\n {\r\n \"expr\": + \"topk(5, sum(traefik_service_requests_total{protocol=~\\\"$protocol\\\"}) by + (code))\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 2,\r\n \"legendFormat\": \"{{code}}\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"title\": \"Top 5 $protocol return code\",\r\n + \ \"type\": \"grafana-piechart-panel\",\r\n \"valueName\": \"current\"\r\n + \ },\r\n {\r\n \"cacheTimeout\": null,\r\n \"colorBackground\": + false,\r\n \"colorValue\": false,\r\n \"colors\": [\r\n \"#299c46\",\r\n + \ \"rgba(237, 129, 40, 0.89)\",\r\n \"#d44a3a\"\r\n ],\r\n + \ \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": + {},\r\n \"overrides\": []\r\n },\r\n \"format\": \"ms\",\r\n + \ \"gauge\": {\r\n \"maxValue\": 100,\r\n \"minValue\": + 0,\r\n \"show\": false,\r\n \"thresholdLabels\": false,\r\n + \ \"thresholdMarkers\": true\r\n },\r\n \"gridPos\": {\r\n + \ \"h\": 6,\r\n \"w\": 4,\r\n \"x\": 13,\r\n \"y\": + 0\r\n },\r\n \"id\": 20,\r\n \"interval\": null,\r\n \"links\": + [],\r\n \"mappingType\": 1,\r\n \"mappingTypes\": [\r\n {\r\n + \ \"name\": \"value to text\",\r\n \"value\": 1\r\n },\r\n + \ {\r\n \"name\": \"range to text\",\r\n \"value\": + 2\r\n }\r\n ],\r\n \"maxDataPoints\": 100,\r\n \"nullPointMode\": + \"connected\",\r\n \"nullText\": null,\r\n \"postfix\": \"\",\r\n + \ \"postfixFontSize\": \"50%\",\r\n \"prefix\": \"\",\r\n \"prefixFontSize\": + \"50%\",\r\n \"rangeMaps\": [\r\n {\r\n \"from\": \"null\",\r\n + \ \"text\": \"N/A\",\r\n \"to\": \"null\"\r\n }\r\n + \ ],\r\n \"sparkline\": {\r\n \"fillColor\": \"rgba(31, + 118, 189, 0.18)\",\r\n \"full\": false,\r\n \"lineColor\": \"rgb(31, + 120, 193)\",\r\n \"show\": true\r\n },\r\n \"tableColumn\": + \"\",\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(traefik_entrypoint_request_duration_seconds_sum) + / sum(traefik_entrypoint_requests_total) * 1000\",\r\n \"format\": + \"time_series\",\r\n \"intervalFactor\": 2,\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": \"\",\r\n \"title\": + \"Average response time\",\r\n \"type\": \"singlestat\",\r\n \"valueFontSize\": + \"80%\",\r\n \"valueMaps\": [\r\n {\r\n \"op\": \"=\",\r\n + \ \"text\": \"N/A\",\r\n \"value\": \"null\"\r\n }\r\n + \ ],\r\n \"valueName\": \"avg\"\r\n },\r\n {\r\n \"aliasColors\": + {},\r\n \"bars\": false,\r\n \"dashLength\": 10,\r\n \"dashes\": + false,\r\n \"datasource\": \"Prometheus\",\r\n \"fieldConfig\": + {\r\n \"defaults\": {\r\n \"links\": []\r\n },\r\n + \ \"overrides\": []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": + 0,\r\n \"gridPos\": {\r\n \"h\": 6,\r\n \"w\": 7,\r\n + \ \"x\": 17,\r\n \"y\": 0\r\n },\r\n \"hiddenSeries\": + false,\r\n \"id\": 14,\r\n \"legend\": {\r\n \"avg\": false,\r\n + \ \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"show\": true,\r\n \"total\": false,\r\n \"values\": + false\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.3\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(traefik_service_request_duration_seconds_sum{protocol=~\\\"$protocol\\\"}) + / sum(traefik_entrypoint_requests_total{protocol=~\\\"$protocol\\\"}) * 1000\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 2,\r\n + \ \"legendFormat\": \"Average response time (ms)\",\r\n \"refId\": + \"A\",\r\n \"step\": 240\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Average response time\",\r\n \"tooltip\": + {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"format\": \"ms\",\r\n \"label\": + null,\r\n \"logBase\": 10,\r\n \"max\": null,\r\n \"min\": + \"0\",\r\n \"show\": true\r\n },\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"decimals\": 0,\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 12,\r\n \"x\": 0,\r\n \"y\": + 6\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 10,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + false,\r\n \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"total\": + false,\r\n \"values\": false\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(increase(traefik_service_requests_total{code=\\\"404\\\",method=\\\"GET\\\",protocol=~\\\"$protocol\\\"}[$interval])) + by (service)\",\r\n \"format\": \"time_series\",\r\n \"interval\": + \"\",\r\n \"intervalFactor\": 2,\r\n \"legendFormat\": \"{{service}} + \",\r\n \"refId\": \"A\",\r\n \"step\": 240\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Bad Status Code Count\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"decimals\": + 0,\r\n \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": false\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + true,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n + \ \"links\": []\r\n },\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 6,\r\n \"w\": 12,\r\n \"x\": 12,\r\n \"y\": + 6\r\n },\r\n \"hiddenSeries\": false,\r\n \"hideTimeOverride\": + false,\r\n \"id\": 4,\r\n \"legend\": {\r\n \"alignAsTable\": + true,\r\n \"avg\": true,\r\n \"current\": false,\r\n \"max\": + true,\r\n \"min\": true,\r\n \"rightSide\": true,\r\n \"show\": + true,\r\n \"total\": false,\r\n \"values\": true\r\n },\r\n + \ \"lines\": false,\r\n \"linewidth\": 1,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.3\",\r\n \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(traefik_service_requests_total[$interval]))\",\r\n + \ \"format\": \"time_series\",\r\n \"intervalFactor\": 2,\r\n + \ \"legendFormat\": \"Total requests\",\r\n \"refId\": \"A\"\r\n + \ }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"Total requests\",\r\n \"tooltip\": {\r\n \"shared\": true,\r\n + \ \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n },\r\n + \ \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"decimals\": 0,\r\n \"format\": \"short\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n },\r\n {\r\n \"decimals\": + 0,\r\n \"format\": \"short\",\r\n \"label\": \"\",\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n \"h\": + 6,\r\n \"w\": 10,\r\n \"x\": 0,\r\n \"y\": 12\r\n },\r\n + \ \"hiddenSeries\": false,\r\n \"id\": 8,\r\n \"legend\": + {\r\n \"avg\": false,\r\n \"current\": false,\r\n \"max\": + false,\r\n \"min\": false,\r\n \"show\": true,\r\n \"total\": + false,\r\n \"values\": false\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null as zero\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": true,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"process_open_fds{job=~\\\"$job\\\"}\",\r\n + \ \"format\": \"time_series\",\r\n \"interval\": \"\",\r\n + \ \"intervalFactor\": 2,\r\n \"legendFormat\": \"{{ instance + }}\",\r\n \"refId\": \"A\",\r\n \"step\": 240\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Used sockets\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": true\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"decimals\": 0,\r\n \"fieldConfig\": {\r\n + \ \"defaults\": {\r\n \"links\": []\r\n },\r\n \"overrides\": + []\r\n },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 6,\r\n \"w\": 14,\r\n \"x\": 10,\r\n \"y\": + 12\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 24,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + true,\r\n \"current\": true,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"total\": + true,\r\n \"values\": true\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": false,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(rate(traefik_service_requests_total{protocol=~\\\"http|https\\\"}[$interval])) + by (service)\",\r\n \"format\": \"time_series\",\r\n \"interval\": + \"\",\r\n \"intervalFactor\": 2,\r\n \"legendFormat\": \"{{service}} + \",\r\n \"refId\": \"A\",\r\n \"step\": 240\r\n }\r\n + \ ],\r\n \"thresholds\": [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": + [],\r\n \"timeShift\": null,\r\n \"title\": \"Access to services\",\r\n + \ \"tooltip\": {\r\n \"shared\": true,\r\n \"sort\": 0,\r\n + \ \"value_type\": \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n + \ \"xaxis\": {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n + \ \"name\": null,\r\n \"show\": true,\r\n \"values\": + []\r\n },\r\n \"yaxes\": [\r\n {\r\n \"decimals\": + 0,\r\n \"format\": \"short\",\r\n \"label\": null,\r\n \"logBase\": + 1,\r\n \"max\": null,\r\n \"min\": null,\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": false\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n },\r\n {\r\n \"aliasColors\": {},\r\n \"bars\": + false,\r\n \"dashLength\": 10,\r\n \"dashes\": false,\r\n \"datasource\": + \"Prometheus\",\r\n \"fieldConfig\": {\r\n \"defaults\": {\r\n + \ \"links\": []\r\n },\r\n \"overrides\": []\r\n },\r\n + \ \"fill\": 7,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n + \ \"h\": 7,\r\n \"w\": 12,\r\n \"x\": 0,\r\n \"y\": + 18\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 30,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + true,\r\n \"current\": true,\r\n \"max\": true,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"sort\": + \"avg\",\r\n \"sortDesc\": true,\r\n \"total\": false,\r\n \"values\": + true\r\n },\r\n \"lines\": true,\r\n \"linewidth\": 1,\r\n + \ \"links\": [],\r\n \"nullPointMode\": \"null\",\r\n \"options\": + {\r\n \"alertThreshold\": true\r\n },\r\n \"percentage\": + false,\r\n \"pluginVersion\": \"7.5.3\",\r\n \"pointradius\": 5,\r\n + \ \"points\": false,\r\n \"renderer\": \"flot\",\r\n \"seriesOverrides\": + [],\r\n \"spaceLength\": 10,\r\n \"stack\": true,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(traefik_entrypoint_open_connections) + by (method)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"{{ method }}\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"ENTRYPOINT - Open Connections\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"decimals\": 0,\r\n \"format\": \"short\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n },\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"fieldConfig\": {\r\n \"defaults\": {\r\n \"links\": + []\r\n },\r\n \"overrides\": []\r\n },\r\n \"fill\": + 7,\r\n \"fillGradient\": 0,\r\n \"gridPos\": {\r\n \"h\": + 7,\r\n \"w\": 12,\r\n \"x\": 12,\r\n \"y\": 18\r\n + \ },\r\n \"hiddenSeries\": false,\r\n \"id\": 28,\r\n \"legend\": + {\r\n \"alignAsTable\": true,\r\n \"avg\": true,\r\n \"current\": + true,\r\n \"max\": true,\r\n \"min\": false,\r\n \"rightSide\": + true,\r\n \"show\": true,\r\n \"sort\": \"avg\",\r\n \"sortDesc\": + true,\r\n \"total\": false,\r\n \"values\": true\r\n },\r\n + \ \"lines\": true,\r\n \"linewidth\": 1,\r\n \"links\": [],\r\n + \ \"nullPointMode\": \"null\",\r\n \"options\": {\r\n \"alertThreshold\": + true\r\n },\r\n \"percentage\": false,\r\n \"pluginVersion\": + \"7.5.3\",\r\n \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [],\r\n \"spaceLength\": 10,\r\n + \ \"stack\": true,\r\n \"steppedLine\": false,\r\n \"targets\": + [\r\n {\r\n \"expr\": \"sum(traefik_service_open_connections) + by (method)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 1,\r\n \"legendFormat\": \"{{ method }}\",\r\n \"refId\": + \"A\"\r\n }\r\n ],\r\n \"thresholds\": [],\r\n \"timeFrom\": + null,\r\n \"timeRegions\": [],\r\n \"timeShift\": null,\r\n \"title\": + \"SERVICE - Open Connections\",\r\n \"tooltip\": {\r\n \"shared\": + true,\r\n \"sort\": 0,\r\n \"value_type\": \"individual\"\r\n + \ },\r\n \"type\": \"graph\",\r\n \"xaxis\": {\r\n \"buckets\": + null,\r\n \"mode\": \"time\",\r\n \"name\": null,\r\n \"show\": + true,\r\n \"values\": []\r\n },\r\n \"yaxes\": [\r\n {\r\n + \ \"decimals\": 0,\r\n \"format\": \"short\",\r\n \"label\": + null,\r\n \"logBase\": 1,\r\n \"max\": null,\r\n \"min\": + null,\r\n \"show\": true\r\n },\r\n {\r\n \"format\": + \"short\",\r\n \"label\": null,\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": null,\r\n \"show\": + false\r\n }\r\n ],\r\n \"yaxis\": {\r\n \"align\": + false,\r\n \"alignLevel\": null\r\n }\r\n },\r\n {\r\n + \ \"aliasColors\": {},\r\n \"bars\": false,\r\n \"dashLength\": + 10,\r\n \"dashes\": false,\r\n \"datasource\": \"Prometheus\",\r\n + \ \"decimals\": 0,\r\n \"fieldConfig\": {\r\n \"defaults\": + {\r\n \"links\": []\r\n },\r\n \"overrides\": []\r\n + \ },\r\n \"fill\": 1,\r\n \"fillGradient\": 0,\r\n \"gridPos\": + {\r\n \"h\": 7,\r\n \"w\": 24,\r\n \"x\": 0,\r\n \"y\": + 25\r\n },\r\n \"hiddenSeries\": false,\r\n \"id\": 12,\r\n + \ \"legend\": {\r\n \"alignAsTable\": true,\r\n \"avg\": + false,\r\n \"current\": false,\r\n \"max\": false,\r\n \"min\": + false,\r\n \"rightSide\": true,\r\n \"show\": true,\r\n \"total\": + false,\r\n \"values\": false\r\n },\r\n \"lines\": true,\r\n + \ \"linewidth\": 1,\r\n \"links\": [],\r\n \"nullPointMode\": + \"null\",\r\n \"options\": {\r\n \"alertThreshold\": true\r\n + \ },\r\n \"percentage\": false,\r\n \"pluginVersion\": \"7.5.3\",\r\n + \ \"pointradius\": 5,\r\n \"points\": false,\r\n \"renderer\": + \"flot\",\r\n \"seriesOverrides\": [\r\n {\r\n \"alias\": + \"/^[^234].*/\",\r\n \"transform\": \"negative-Y\"\r\n }\r\n + \ ],\r\n \"spaceLength\": 10,\r\n \"stack\": false,\r\n \"steppedLine\": + false,\r\n \"targets\": [\r\n {\r\n \"expr\": \"sum(increase(traefik_service_requests_total{protocol=~\\\"$protocol\\\"}[$interval])) + by (code)\",\r\n \"format\": \"time_series\",\r\n \"intervalFactor\": + 2,\r\n \"legendFormat\": \"{{code}}\",\r\n \"refId\": \"A\",\r\n + \ \"step\": 120\r\n }\r\n ],\r\n \"thresholds\": + [],\r\n \"timeFrom\": null,\r\n \"timeRegions\": [],\r\n \"timeShift\": + null,\r\n \"title\": \"Status Code Count \",\r\n \"tooltip\": {\r\n + \ \"shared\": true,\r\n \"sort\": 0,\r\n \"value_type\": + \"individual\"\r\n },\r\n \"type\": \"graph\",\r\n \"xaxis\": + {\r\n \"buckets\": null,\r\n \"mode\": \"time\",\r\n \"name\": + null,\r\n \"show\": true,\r\n \"values\": []\r\n },\r\n + \ \"yaxes\": [\r\n {\r\n \"decimals\": 0,\r\n \"format\": + \"short\",\r\n \"label\": \"\",\r\n \"logBase\": 1,\r\n + \ \"max\": null,\r\n \"min\": \"0\",\r\n \"show\": + true\r\n },\r\n {\r\n \"format\": \"short\",\r\n + \ \"label\": null,\r\n \"logBase\": 1,\r\n \"max\": + null,\r\n \"min\": null,\r\n \"show\": false\r\n }\r\n + \ ],\r\n \"yaxis\": {\r\n \"align\": false,\r\n \"alignLevel\": + null\r\n }\r\n }\r\n ],\r\n \"refresh\": false,\r\n \"schemaVersion\": + 27,\r\n \"style\": \"dark\",\r\n \"tags\": [\r\n \"traefik\",\r\n \"load-balancer\",\r\n + \ \"docker\",\r\n \"prometheus\"\r\n ],\r\n \"templating\": {\r\n + \ \"list\": [\r\n {\r\n \"allValue\": null,\r\n \"current\": + {\r\n \"selected\": false,\r\n \"text\": \"traefik-dashboard\",\r\n + \ \"value\": \"traefik-dashboard\"\r\n },\r\n \"datasource\": + \"Prometheus\",\r\n \"definition\": \"\",\r\n \"description\": + null,\r\n \"error\": null,\r\n \"hide\": 0,\r\n \"includeAll\": + false,\r\n \"label\": \"Job:\",\r\n \"multi\": false,\r\n \"name\": + \"job\",\r\n \"options\": [],\r\n \"query\": {\r\n \"query\": + \"label_values(job)\",\r\n \"refId\": \"Prometheus-job-Variable-Query\"\r\n + \ },\r\n \"refresh\": 1,\r\n \"regex\": \"\",\r\n \"skipUrlSync\": + false,\r\n \"sort\": 2,\r\n \"tagValuesQuery\": \"\",\r\n \"tags\": + [],\r\n \"tagsQuery\": \"\",\r\n \"type\": \"query\",\r\n \"useTags\": + false\r\n },\r\n {\r\n \"allValue\": null,\r\n \"current\": + {\r\n \"selected\": false,\r\n \"text\": \"All\",\r\n \"value\": + \"$__all\"\r\n },\r\n \"datasource\": \"Prometheus\",\r\n \"definition\": + \"label_values(traefik_service_requests_total, protocol)\",\r\n \"description\": + null,\r\n \"error\": null,\r\n \"hide\": 0,\r\n \"includeAll\": + true,\r\n \"label\": \"Service:\",\r\n \"multi\": true,\r\n + \ \"name\": \"protocol\",\r\n \"options\": [],\r\n \"query\": + {\r\n \"query\": \"label_values(traefik_service_requests_total, protocol)\",\r\n + \ \"refId\": \"Prometheus-protocol-Variable-Query\"\r\n },\r\n + \ \"refresh\": 1,\r\n \"regex\": \"\",\r\n \"skipUrlSync\": + false,\r\n \"sort\": 0,\r\n \"tagValuesQuery\": \"\",\r\n \"tags\": + [],\r\n \"tagsQuery\": \"\",\r\n \"type\": \"query\",\r\n \"useTags\": + false\r\n },\r\n {\r\n \"auto\": true,\r\n \"auto_count\": + 30,\r\n \"auto_min\": \"10s\",\r\n \"current\": {\r\n \"selected\": + false,\r\n \"text\": \"auto\",\r\n \"value\": \"$__auto_interval_interval\"\r\n + \ },\r\n \"description\": null,\r\n \"error\": null,\r\n + \ \"hide\": 0,\r\n \"label\": \"Interval\",\r\n \"name\": + \"interval\",\r\n \"options\": [\r\n {\r\n \"selected\": + true,\r\n \"text\": \"auto\",\r\n \"value\": \"$__auto_interval_interval\"\r\n + \ },\r\n {\r\n \"selected\": false,\r\n \"text\": + \"1m\",\r\n \"value\": \"1m\"\r\n },\r\n {\r\n + \ \"selected\": false,\r\n \"text\": \"10m\",\r\n \"value\": + \"10m\"\r\n },\r\n {\r\n \"selected\": false,\r\n + \ \"text\": \"30m\",\r\n \"value\": \"30m\"\r\n },\r\n + \ {\r\n \"selected\": false,\r\n \"text\": + \"1h\",\r\n \"value\": \"1h\"\r\n },\r\n {\r\n + \ \"selected\": false,\r\n \"text\": \"6h\",\r\n \"value\": + \"6h\"\r\n },\r\n {\r\n \"selected\": false,\r\n + \ \"text\": \"12h\",\r\n \"value\": \"12h\"\r\n },\r\n + \ {\r\n \"selected\": false,\r\n \"text\": + \"1d\",\r\n \"value\": \"1d\"\r\n },\r\n {\r\n + \ \"selected\": false,\r\n \"text\": \"7d\",\r\n \"value\": + \"7d\"\r\n },\r\n {\r\n \"selected\": false,\r\n + \ \"text\": \"14d\",\r\n \"value\": \"14d\"\r\n },\r\n + \ {\r\n \"selected\": false,\r\n \"text\": + \"30d\",\r\n \"value\": \"30d\"\r\n }\r\n ],\r\n + \ \"query\": \"1m,10m,30m,1h,6h,12h,1d,7d,14d,30d\",\r\n \"refresh\": + 2,\r\n \"skipUrlSync\": false,\r\n \"type\": \"interval\"\r\n + \ }\r\n ]\r\n },\r\n \"time\": {\r\n \"from\": \"now-3h\",\r\n + \ \"to\": \"now\"\r\n },\r\n \"timepicker\": {\r\n \"refresh_intervals\": + [\r\n \"5s\",\r\n \"10s\",\r\n \"30s\",\r\n \"1m\",\r\n + \ \"5m\",\r\n \"15m\",\r\n \"30m\",\r\n \"1h\",\r\n + \ \"2h\",\r\n \"1d\"\r\n ],\r\n \"time_options\": [\r\n + \ \"5m\",\r\n \"15m\",\r\n \"1h\",\r\n \"6h\",\r\n + \ \"12h\",\r\n \"24h\",\r\n \"2d\",\r\n \"7d\",\r\n + \ \"30d\"\r\n ]\r\n },\r\n \"timezone\": \"\",\r\n \"title\": + \"Traefik 2\",\r\n \"uid\": \"3ipsWfViz\",\r\n \"version\": 2\r\n }" + diff --git a/src/terraform/modules/traefik/dashboard-ingress.yaml b/src/terraform/modules/traefik/dashboard-ingress.yaml new file mode 100644 index 00000000..5eef147d --- /dev/null +++ b/src/terraform/modules/traefik/dashboard-ingress.yaml @@ -0,0 +1,16 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: dashboard + namespace: ${namespace} +spec: + entryPoints: + - websecure + routes: + - match: Host(`traefik.${namespace}.${domain-name}`) && (PathPrefix(`/dashboard`) || PathPrefix(`/api`)) + kind: Rule + services: + - name: api@internal + kind: TraefikService + tls: + - secretName: traefik-cert diff --git a/src/terraform/modules/traefik/http-redirect-middleware.yaml b/src/terraform/modules/traefik/http-redirect-middleware.yaml new file mode 100644 index 00000000..3e65399a --- /dev/null +++ b/src/terraform/modules/traefik/http-redirect-middleware.yaml @@ -0,0 +1,7 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: Middleware +metadata: + name: redirect-http +spec: + redirectScheme: + scheme: https \ No newline at end of file diff --git a/src/terraform/modules/traefik/main.tf b/src/terraform/modules/traefik/main.tf new file mode 100644 index 00000000..16488bdb --- /dev/null +++ b/src/terraform/modules/traefik/main.tf @@ -0,0 +1,77 @@ +resource "helm_release" "traefik" { + name = "traefik" + + repository = "https://helm.traefik.io/traefik" + chart = "traefik" + version = "9.19.1" + namespace = var.namespace + wait = true + wait_for_jobs = true + values = [ + "${templatefile("${path.module}/traefik-values.yaml", { + namespace = var.namespace, + domain-name = var.domain-name, + traefik-domain-name = var.traefik-domain-name, + service-ip = var.service-ip + } )}" + ] +} + +resource "kubectl_manifest" "tls-store" { + depends_on = [ + helm_release.traefik, + ] + yaml_body = templatefile("${path.module}/cert-store.yaml", { + namespace = var.namespace, + domain-name = var.domain-name + }) +} + +resource "kubectl_manifest" "http-redirect-middleware" { + depends_on = [ + helm_release.traefik, + ] + yaml_body = templatefile("${path.module}/http-redirect-middleware.yaml", { + namespace = var.namespace, + domain-name = var.domain-name + }) +} + +resource "kubectl_manifest" "traefik-cert" { + yaml_body = templatefile("${path.module}/traefik-cert.yaml", { + namespace = var.namespace, + domain-name = var.domain-name + }) +} + +resource "kubectl_manifest" "dashboard-ingress" { + depends_on = [ + helm_release.traefik, + ] + yaml_body = templatefile("${path.module}/dashboard-ingress.yaml", { + namespace = var.namespace, + domain-name = var.domain-name + }) +} + +resource "kubectl_manifest" "dashboard-service" { + depends_on = [ + helm_release.traefik, + ] + yaml_body = templatefile("${path.module}/crds/traefik-dashboard-service.yaml", { + namespace = var.namespace + }) +} + +resource "kubectl_manifest" "traefik-grafana-dashboard" { + depends_on = [ + helm_release.traefik, + ] + yaml_body = templatefile("${path.module}/crds/traefik-grafana-dashboard.yaml", { + namespace = var.namespace + }) +} + +output "traefik-url" { + value = "https://${var.traefik-domain-name}" +} \ No newline at end of file diff --git a/src/terraform/modules/traefik/providers.tf b/src/terraform/modules/traefik/providers.tf new file mode 100644 index 00000000..99459e5a --- /dev/null +++ b/src/terraform/modules/traefik/providers.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + } +} diff --git a/src/terraform/modules/traefik/traefik-cert.yaml b/src/terraform/modules/traefik/traefik-cert.yaml new file mode 100644 index 00000000..d9e98c97 --- /dev/null +++ b/src/terraform/modules/traefik/traefik-cert.yaml @@ -0,0 +1,16 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: traefik-cert + namespace: ${namespace} +spec: + secretName: traefik-cert + issuerRef: + name: selfsigned-ca-issuer + kind: ClusterIssuer + commonName: traefik-cert + dnsNames: + - ${domain-name} + - '*.${domain-name}' + - '*.${namespace}.${domain-name}' + - '*.login.${domain-name}' diff --git a/src/terraform/modules/traefik/traefik-values.yaml b/src/terraform/modules/traefik/traefik-values.yaml new file mode 100644 index 00000000..ce4bfb44 --- /dev/null +++ b/src/terraform/modules/traefik/traefik-values.yaml @@ -0,0 +1,78 @@ +dashboard: + enabled: true + domain: ${traefik-domain-name} +deployment: + podAnnotations: + "consul.hashicorp.com/connect-inject": "true" + "consul.hashicorp.com/connect-service-port": "8000" + "consul.hashicorp.com/transparent-proxy": "true" +additionalArguments: + - "--metrics.prometheus=true" + - "--tracing.jaeger=true" + - "--tracing.jaeger.samplingServerURL=http://jaeger-agent.${namespace}.svc:5778/sampling" + - "--tracing.jaeger.localAgentHostPort=jaeger-agent.${namespace}.svc:6831" + - "--serversTransport.insecureSkipVerify=true" + - "--providers.kubernetesingress=true" + +logs: + access: + enabled: true + format: json + general: + format: json + level: INFO + +kubernetes: + namespaces: + - default + - kube-system + +ports: + websecure: + tls: + enabled: true + +volumes: + - name: traefik-cert + mountPath: "/certs" + type: secret + +ingressClass: +# # true is not unit-testable yet, pending https://github.com/rancher/helm-unittest/pull/12 + enabled: true + isDefaultClass: true +# # Use to force a networking.k8s.io API Version for certain CI/CD applications. E.g. "v1beta1" +# # fallbackApiVersion: + +ingressRoute: + dashboard: + enabled: true + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web, websecure + traefik.ingress.kubernetes.io/router.middlewares: default-redirect-http@kubernetescrd + +service: + enabled: true + spec: + externalTrafficPolicy: Cluster + externalIPs: [ + "${service-ip}" + ] + +tls: + certificates: + - certFile: /certs/tls.crt + keyFile: /certs/tls.key + stores: + default: + defaultCertificate: + certFile: /certs/tls.crt + keyFile: /certs/tls.key + +# consul: +# enabled: true +# endpoint: "consul-consul-server:8500" +# datacenter: datacenter1 +# watch: true +# exposedByDefault: true +# prefix: traefik diff --git a/src/terraform/modules/traefik/variables.tf b/src/terraform/modules/traefik/variables.tf new file mode 100644 index 00000000..22e0d378 --- /dev/null +++ b/src/terraform/modules/traefik/variables.tf @@ -0,0 +1,15 @@ +variable "domain-name" { + type = string +} + +variable "namespace" { + type = string +} + +variable "traefik-domain-name" { + type = string +} + +variable "service-ip" { + type = string +} diff --git a/src/terraform/providers.tf b/src/terraform/providers.tf new file mode 100644 index 00000000..6ee0fba5 --- /dev/null +++ b/src/terraform/providers.tf @@ -0,0 +1,39 @@ +terraform { + required_version = ">= 0.13" + required_providers { + kubernetes-alpha = { + source = "hashicorp/kubernetes-alpha" + version = "0.4.1" + } + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.7.1" + } + } +} + +provider "kubectl" { + config_path = "~/.kube/config" + config_context = "${var.cluster-context-name}" +} + +provider "kubernetes" { + config_path = "~/.kube/config" + config_context = "${var.cluster-context-name}" +} + +provider "kubernetes-alpha" { + config_path = "~/.kube/config" + config_context = "${var.cluster-context-name}" +} + +provider "helm" { + kubernetes { + config_path = "~/.kube/config" + config_context = "${var.cluster-context-name}" + } +} \ No newline at end of file diff --git a/src/terraform/terraform.tfvars b/src/terraform/terraform.tfvars new file mode 100644 index 00000000..18244fb7 --- /dev/null +++ b/src/terraform/terraform.tfvars @@ -0,0 +1,14 @@ +cluster-context-name = "kind-devinfra" +namespace = "infrastructure" +domain-name = "k8s.local" +consul-datacenter = "datacenter1" +service-ip="172.18.0.2" + +install_cert_manager = true +install_consul = true +install_elasticsearch = true +install_identityserver4admin = true +install_jaeger = true +install_loki = true +install_prometheus = true +install_traefik = true diff --git a/src/terraform/variables.tf b/src/terraform/variables.tf new file mode 100644 index 00000000..eaed2b30 --- /dev/null +++ b/src/terraform/variables.tf @@ -0,0 +1,64 @@ +variable "cluster-context-name" { + type = string +} + +variable "domain-name" { + type = string +} + +variable "namespace" { + type = string +} + +variable "consul-datacenter" { + type = string +} + +variable "service-ip" { + type = string +} + +variable "install_consul" { + type = bool + default = true +} + +variable "install_cert_manager" { + type = bool + default = false +} + +variable "patch_coredns" { + type = bool + default = true +} + +variable "install_traefik" { + type = bool + default = true +} + +variable "install_elasticsearch" { + type = bool + default = true +} + +variable "install_prometheus" { + type = bool + default = true +} + +variable "install_jaeger" { + type = bool + default = true +} + +variable "install_loki" { + type = bool + default = true +} + +variable "install_identityserver4admin" { + type = bool + default = true +} \ No newline at end of file