Skip to content

Commit

Permalink
chore: update otel example
Browse files Browse the repository at this point in the history
vmihailenco committed Nov 9, 2022
1 parent 1278a80 commit dd858ea
Showing 8 changed files with 164 additions and 22 deletions.
16 changes: 16 additions & 0 deletions example/otel/README.md
Original file line number Diff line number Diff line change
@@ -40,9 +40,25 @@ UPTRACE_DSN=http://project2_secret_token@localhost:14317/2 go run client.go
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
```

![Redis trace](./image/redis-trace.png)

You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
spans, logs, and metrics.

## Redis monitoring

You can also [monitor Redis performance](https://uptrace.dev/opentelemetry/redis-monitoring.html)
metrics By installing OpenTelemetry Collector.

[OpenTelemetry Collector](https://uptrace.dev/opentelemetry/collector.html) is an agent that pulls
telemetry data from systems you want to monitor and sends it to APM tools using the OpenTelemetry
protocol (OTLP).

When telemetry data reaches Uptrace, it automatically generates a Redis dashboard from a pre-defined
template.

![Redis dashboard](./image/metrics.png)

## Links

- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)
53 changes: 53 additions & 0 deletions example/otel/config/alertmanager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# See https://prometheus.io/docs/alerting/latest/configuration/ for details.

global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'mailhog:1025'
smtp_from: 'alertmanager@example.com'
smtp_require_tls: false

receivers:
- name: 'team-X'
email_configs:
- to: 'some-receiver@example.com'
send_resolved: true

# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']

# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s

# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m

# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h

# A default receiver
receiver: team-X

# All the above attributes are inherited by all child routes and can
# overwritten on each.

# The child route trees.
routes:
# This route matches error alerts created from spans or logs.
- matchers:
- alert_kind="error"
group_interval: 24h
receiver: team-X

# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
File renamed without changes.
File renamed without changes.
27 changes: 21 additions & 6 deletions example/otel/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ services:
- '9000:9000'

uptrace:
image: 'uptrace/uptrace:1.1.0'
image: 'uptrace/uptrace:1.2.0'
#image: 'uptrace/uptrace-dev:latest'
restart: on-failure
volumes:
@@ -36,19 +36,34 @@ services:
otel-collector:
image: otel/opentelemetry-collector-contrib:0.58.0
restart: on-failure
user: '0:0' # required for logs
volumes:
- ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/log:/var/log:ro
- ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml
ports:
- '4317:4317'
- '4318:4318'

vector:
image: timberio/vector:0.24.X-alpine
volumes:
- ./vector.toml:/etc/vector/vector.toml:ro
- ./config/vector.toml:/etc/vector/vector.toml:ro

alertmanager:
image: prom/alertmanager:v0.24.0
restart: on-failure
volumes:
- ./config/alertmanager.yml:/etc/alertmanager/config.yml
- alertmanager_data:/alertmanager
ports:
- 9093:9093
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'

mailhog:
image: mailhog/mailhog:v1.0.1
restart: on-failure
ports:
- '8025:8025'

redis-server:
image: redis
Binary file added example/otel/image/metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added example/otel/image/redis-trace.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
90 changes: 74 additions & 16 deletions example/otel/uptrace.yml
Original file line number Diff line number Diff line change
@@ -13,6 +13,16 @@
## foo: $$FOO_BAR
##

##
## ClickHouse database credentials.
##
ch:
# Connection string for ClickHouse database. For example:
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
#
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'

##
## A list of pre-configured projects. Each project is fully isolated.
##
@@ -26,6 +36,10 @@ projects:
- service.name
- host.name
- deployment.environment
# Group spans by deployment.environment attribute.
group_by_env: false
# Group funcs spans by service.name attribute.
group_funcs_by_service: false

# Other projects can be used to monitor your applications.
# To monitor micro-services or multiple related services, use a single project.
@@ -36,6 +50,49 @@ projects:
- service.name
- host.name
- deployment.environment
# Group spans by deployment.environment attribute.
group_by_env: false
# Group funcs spans by service.name attribute.
group_funcs_by_service: false

##
## Create metrics from spans and events.
##
metrics_from_spans:
- name: uptrace.tracing.spans_duration
description: Spans duration (excluding events)
instrument: histogram
unit: microseconds
value: span.duration / 1000
attrs:
- span.system as system
- service.name as service
- host.name as host
- span.status_code as status
where: not span.is_event

- name: uptrace.tracing.spans
description: Spans count (excluding events)
instrument: counter
unit: 1
value: span.count
attrs:
- span.system as system
- service.name as service
- host.name as host
- span.status_code as status
where: not span.is_event

- name: uptrace.tracing.events
description: Events count (excluding spans)
instrument: counter
unit: 1
value: span.count
attrs:
- span.system as system
- service.name as service
- host.name as host
where: span.is_event

##
## To require authentication, uncomment the following section.
@@ -78,16 +135,6 @@ auth:
# # Defaults to 'preferred_username'.
# claim: preferred_username

##
## ClickHouse database credentials.
##
ch:
# Connection string for ClickHouse database. For example:
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
#
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'

##
## Alerting rules for monitoring metrics.
##
@@ -102,8 +149,8 @@ alerting:
- $net_errors > 0 group by host.name
# for the last 5 minutes
for: 5m
# in the project id=1
projects: [1]
annotations:
summary: '{{ $labels.host_name }} has high number of net errors: {{ $values.net_errors }}'

- name: Filesystem usage >= 90%
metrics:
@@ -114,15 +161,26 @@ alerting:
- where device !~ "loop"
- $fs_usage{state="used"} / $fs_usage >= 0.9
for: 5m
projects: [1]
annotations:
summary: '{{ $labels.host_name }} has high FS usage: {{ $values.fs_usage }}'

- name: Uptrace is dropping spans
metrics:
- uptrace.projects.spans as $spans
query:
- $spans{type=dropped} > 0
for: 1m
projects: [1]
annotations:
summary: 'Uptrace has dropped {{ $values.spans }} spans'

- name: Always firing (for fun and testing)
metrics:
- process.runtime.go.goroutines as $goroutines
query:
- $goroutines >= 0 group by host.name
for: 1m
annotations:
summary: '{{ $labels.host_name }} has high number of goroutines: {{ $values.goroutines }}'

# Create alerts from error logs and span events.
create_alerts_from_spans:
@@ -139,8 +197,8 @@ alerting:
##
alertmanager_client:
# AlertManager API endpoints that Uptrace uses to manage alerts.
# urls:
# - 'http://alertmanager:9093/api/v2/alerts'
urls:
- 'http://alertmanager:9093/api/v2/alerts'

##
## Various options to tweak ClickHouse schema.

0 comments on commit dd858ea

Please sign in to comment.