Skip to content

Commit b96254c

Browse files
committed
fix(config): Refine Grafana Alloy configuration for improved logging and metrics collection
1 parent f5260b3 commit b96254c

File tree

1 file changed

+45
-49
lines changed

1 file changed

+45
-49
lines changed

apps/clusters/prod/apps/alloy/config.alloy

Lines changed: 45 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,33 @@
1-
// === Generated from your template (kept your remote_write URL) ===
2-
// Goal: scrape Prometheus metrics and collect Kubernetes logs & events across ALL namespaces.
3-
// Notes:
4-
// - Uses discovery.kubernetes without namespace filters (default = all namespaces) — see docs.
5-
// - For metrics, only Pods/Services annotated with `prometheus.io/scrape: "true"` are scraped.
6-
// You can override path/port via `prometheus.io/path` and `prometheus.io/port` annotations.
7-
// - For logs, loki.source.kubernetes tails container logs over the Kubernetes API (no DaemonSet needed).
8-
// - Set environment variables in your Deployment for LOKI_URL (and optionally PROM_REMOTE_WRITE_URL).
1+
// === Grafana Alloy — final config ===
2+
// Cluster-wide metrics & logs (all namespaces).
3+
// - Prometheus metrics: opt-in via standard annotations.
4+
// - Logs: Kubernetes Pod logs + Kubernetes Events.
5+
// - Loki write endpoint: baked in (your gateway).
6+
// - Ingress-NGINX helper: parses JSON logs and sets a few low-cardinality labels.
97
//
10-
// Original snippet for reference:
11-
// discovery.kubernetes "pods" {
12-
// role = "pod"
13-
// /*namespaces {
14-
// own_namespace = true
15-
// names = ["grafana"]
16-
// }*/
17-
// }
18-
// discovery.kubernetes "services" {
19-
// role = "service"
20-
// }
21-
//
22-
// prometheus.scrape "pods" {
23-
// targets = discovery.kubernetes.pods.targets
24-
// forward_to = [prometheus.remote_write.default.receiver]
25-
// }
26-
// prometheus.scrape "services" {
27-
// targets = discovery.kubernetes.services.targets
28-
// forward_to = [prometheus.remote_write.default.receiver]
29-
// }
30-
//
31-
// prometheus.remote_write "default" {
32-
// endpoint {
33-
// url = "http://mimir-nginx.grafana.svc.cluster.local/api/v1/push"
34-
// }
35-
// }
36-
8+
// Ops tips:
9+
// * Keep labels conservative to avoid high cardinality in Loki/Prometheus.
10+
// * You can override endpoints via env vars if needed.
11+
//
12+
// ----- Global logging for Alloy itself -----
3713
logging {
3814
level = sys.env("ALLOY_LOG_LEVEL") // default INFO if unset
3915
format = "logfmt"
4016
}
4117

42-
// -----------------------------
18+
// =============================
4319
// Kubernetes discovery (all ns)
44-
// -----------------------------
20+
// =============================
4521
discovery.kubernetes "pods" {
4622
role = "pod"
4723
}
4824
discovery.kubernetes "endpoints" {
4925
role = "endpoints"
5026
}
5127

52-
// -----------------------------
28+
// =============================
5329
// METRICS (Prometheus) pipeline
54-
// -----------------------------
30+
// =============================
5531

5632
// Pods: respect standard Prometheus annotations
5733
discovery.relabel "pods_metrics" {
@@ -85,8 +61,8 @@ discovery.relabel "pods_metrics" {
8561
rule { action = "labelmap" regex = "__meta_kubernetes_pod_label_(.+)" }
8662

8763
// common labels
88-
rule { action = "replace" source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" }
89-
rule { action = "replace" source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" }
64+
rule { action = "replace" source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" }
65+
rule { action = "replace" source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" }
9066
rule { action = "replace" source_labels = ["__meta_kubernetes_pod_container_name"] target_label = "container" }
9167
}
9268

@@ -136,25 +112,26 @@ prometheus.scrape "services" {
136112
forward_to = [prometheus.remote_write.default.receiver]
137113
}
138114

139-
// Remote write (kept your original URL; you can also set PROM_REMOTE_WRITE_URL to override)
115+
// Remote write: override with PROM_REMOTE_WRITE_URL if needed
140116
prometheus.remote_write "default" {
141117
external_labels = {
142118
"cluster" = sys.env("CLUSTER"), // optional
143-
"__replica__" = sys.env("KUBE_POD_NAME"), // good for HA fanout
119+
"__replica__" = sys.env("KUBE_POD_NAME"), // good for HA fanout (set via Downward API)
144120
}
145121
endpoint {
146122
url = coalesce(sys.env("PROM_REMOTE_WRITE_URL"), "http://mimir-nginx.grafana.svc.cluster.local/api/v1/push")
147123
// headers = { "X-Scope-OrgID" = sys.env("PROM_TENANT") } // Grafana Cloud / Mimir multi-tenant
148124
}
149125
}
150126

151-
// -----------------------------
127+
// =============================
152128
// LOGS (Loki) pipeline (all ns)
153-
// -----------------------------
129+
// =============================
154130

131+
// Loki write endpoint — hard-coded to your gateway
155132
loki.write "default" {
156133
endpoint {
157-
url = sys.env("LOKI_URL") // e.g. http://loki-gateway.grafana.svc.cluster.local/loki/api/v1/push
134+
url = "http://loki-gateway.grafana.svc.cluster.local:80/loki/api/v1/push"
158135
// tenant_id = sys.env("LOKI_TENANT_ID")
159136
// basic_auth { username = sys.env("LOKI_USERNAME"); password = sys.env("LOKI_PASSWORD") }
160137
}
@@ -166,11 +143,30 @@ loki.source.kubernetes "pods" {
166143
forward_to = [loki.process.k8s_logs.receiver]
167144
}
168145

169-
// Enrich with Kubernetes metadata; drop nothing by default
146+
// Enrich with Kubernetes metadata; add targeted parsing for ingress-nginx JSON logs
170147
loki.process "k8s_logs" {
171-
stage.kubernetes {} // add pod, namespace, labels, etc.
172-
// Example filters (uncomment if needed):
148+
// Always enrich with Kubernetes metadata (namespace, pod, container, labels)
149+
stage.kubernetes {}
150+
151+
// --- Focused branch for ingress-nginx controller logs in JSON format ---
152+
stage.match {
153+
selector = "{namespace="ingress-nginx", container="controller"}"
154+
// Try to parse JSON access logs (as configured in your Helm values)
155+
stage.json {}
156+
157+
// Add a few stable labels for better queries (avoid high-cardinality labels like path/ip/user-agent)
158+
stage.labels {
159+
values = {
160+
status = "status",
161+
method = "method",
162+
host = "vhost",
163+
}
164+
}
165+
}
166+
167+
// Example global filters (disabled by default). Uncomment if too chatty:
173168
// stage.drop { expression = "contains(line, "/healthz") or contains(line, "/readyz") or contains(line, "/livez")" }
169+
174170
forward_to = [loki.write.default.receiver]
175171
}
176172

0 commit comments

Comments
 (0)