This repo creates:
- Clickhouse 4-node Cluster with the
system.opentelemetry_span_log
table. This is made possible by setting the session variablesopentelemetry_start_trace_probability: 1
, required for that table to be created. - One node Keeper cluster
- Jaeger installation
- Grafana for displaying the traces in a dashboard
Once created, in this cluster will automatically store trace data for queries in system.opentelemtry_span_log
SELECT
lower(hex(trace_id)) AS traceId,
multiIf(parent_span_id = 0, '', lower(hex(parent_span_id))) AS parentId,
lower(hex(span_id)) AS id,
operation_name AS name,
start_time_us AS timestamp,
finish_time_us - start_time_us AS duration,
CAST(tuple('clickhouse'), 'Tuple(serviceName text)') AS localEndpoint,
attribute['clickhouse.thread_num'] -- get value of this map
FROM system.opentelemetry_span_log
Useful view for traces:
CREATE VIEW default.otel_traces_trace_id_ts
(
`TraceId` UUID,
`Start` UInt64,
`End` UInt64
)
AS SELECT
trace_id AS TraceId,
min(start_time_us / 1000 / 1000) AS Start,
max(finish_time_us / 1000 / 1000) AS End
FROM system.opentelemetry_span_log
GROUP BY TraceId;
CREATE VIEW default.otel_traces
(
`TraceId` UUID,
`SpanId` UInt64,
`ParentSpanId` UInt64,
`ServiceName` String,
`SpanName` LowCardinality(String),
`Timestamp` Float64,
`Duration` Float64,
`SpanAttributes` Map(LowCardinality(String), String),
`ResourceAttributes` Map(LowCardinality(String), String)
)
AS SELECT
trace_id AS TraceId,
span_id AS SpanId,
parent_span_id AS ParentSpanId,
'ClickHouse' AS ServiceName,
operation_name AS SpanName,
(start_time_us / 1000) / 1000 AS Timestamp,
((finish_time_us - start_time_us) / 1000) / 1000 AS Duration,
attribute AS SpanAttributes,
mapUpdate(
mapUpdate(
mapUpdate(
attribute,
map('server.address', toString(hostname))
),
map('server.shard', getMacro('shard'))
),
map('server.replica', getMacro('replica'))
) AS ResourceAttributes
FROM system.opentelemetry_span_log
Example usage:
clickhouse-client --opentelemetry-traceparent "00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"
Flame Graph Debug Hang Queries Integration with Other Monitoring Systems Maciej's query templates Clickhouse documentation on processors Proccessors header file
CREATE OR REPLACE VIEW default.otel_traces_trace_id_ts ON CLUSTER '{cluster}'
(
`TraceId` UUID,
`Start` UInt64,
`End` UInt64
)
AS SELECT
trace_id AS TraceId,
min(start_time_us / 1000) AS Start,
max(finish_time_us / 1000) AS End
FROM clusterAllReplicas('{cluster}', system.opentelemetry_span_log)
GROUP BY TraceId;
-- Create a view for easier querying of trace data
CREATE OR REPLACE VIEW default.otel_traces ON CLUSTER '{cluster}'
(
`TraceId` UUID,
`SpanId` UInt64,
`ParentSpanId` UInt64,
`ServiceName` String,
`SpanName` LowCardinality(String),
`Timestamp` Float64,
`Duration` Float64,
`SpanAttributes` Map(LowCardinality(String), String),
`ResourceAttributes` Map(LowCardinality(String), String),
`Hostname` String,
`Shard` String,
`Replica` String
)
AS SELECT
trace_id AS TraceId,
span_id AS SpanId,
parent_span_id AS ParentSpanId,
'ClickHouse' AS ServiceName,
operation_name AS SpanName,
(start_time_us / 1000) AS Timestamp,
((finish_time_us - start_time_us) / 1000) AS Duration,
attribute AS SpanAttributes,
mapUpdate(
attribute,
map('server.address', toString(hostname))
) AS ResourceAttributes,
hostname AS Hostname
FROM clusterAllReplicas('{cluster}', system.opentelemetry_span_log);