open-telemetry · dmitryax · Jun 19, 2024 · Jun 18, 2024 · Jun 18, 2024 · Jun 18, 2024
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: clickhouseexporter
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Updated the default logs table to a more optimized schema
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [33611]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext: Simplified data types, improved partitioning and time range queries.
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: []
@@ -43,7 +43,7 @@ as [ClickHouse document says:](https://clickhouse.com/docs/en/introduction/perfo
 - Get log severity count time series.
 
 ```clickhouse
-SELECT toDateTime(toStartOfInterval(Timestamp, INTERVAL 60 second)) as time, SeverityText, count() as count
+SELECT toDateTime(toStartOfInterval(TimestampTime, INTERVAL 60 second)) as time, SeverityText, count() as count
 FROM otel_logs
 WHERE time >= NOW() - INTERVAL 1 HOUR
 GROUP BY SeverityText, time
@@ -55,7 +55,7 @@ ORDER BY time;
 ```clickhouse
 SELECT Timestamp as log_time, Body
 FROM otel_logs
-WHERE Timestamp >= NOW() - INTERVAL 1 HOUR
+WHERE TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -65,7 +65,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE ServiceName = 'clickhouse-exporter'
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -75,7 +75,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE LogAttributes['container_name'] = '/example_flog_1'
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -85,7 +85,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE hasToken(Body, 'http')
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -95,7 +95,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE Body like '%http%'
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -105,7 +105,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE match(Body, 'http')
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 
@@ -115,7 +115,7 @@ Limit 100;
 SELECT Timestamp as log_time, Body
 FROM otel_logs
 WHERE JSONExtractFloat(Body, 'bytes') > 1000
-  AND Timestamp >= NOW() - INTERVAL 1 HOUR
+  AND TimestampTime >= NOW() - INTERVAL 1 HOUR
 Limit 100;
 ```
 

@@ -1,21 +1,24 @@
 -- Default Logs table DDL
 
 CREATE TABLE IF NOT EXISTS otel_logs (
-	Timestamp DateTime64(9) CODEC(Delta, ZSTD(1)),
+	Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)),
+	TimestampDate Date DEFAULT toDate(Timestamp),
+	TimestampTime DateTime DEFAULT toDateTime(Timestamp),
 	TraceId String CODEC(ZSTD(1)),
 	SpanId String CODEC(ZSTD(1)),
-	TraceFlags UInt32 CODEC(ZSTD(1)),
+	TraceFlags UInt8,
 	SeverityText LowCardinality(String) CODEC(ZSTD(1)),
-	SeverityNumber Int32 CODEC(ZSTD(1)),
+	SeverityNumber UInt8,
 	ServiceName LowCardinality(String) CODEC(ZSTD(1)),
 	Body String CODEC(ZSTD(1)),
-	ResourceSchemaUrl String CODEC(ZSTD(1)),
+	ResourceSchemaUrl LowCardinality(String) CODEC(ZSTD(1)),
 	ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
-	ScopeSchemaUrl String CODEC(ZSTD(1)),
+	ScopeSchemaUrl LowCardinality(String) CODEC(ZSTD(1)),
 	ScopeName String CODEC(ZSTD(1)),
-	ScopeVersion String CODEC(ZSTD(1)),
+	ScopeVersion LowCardinality(String) CODEC(ZSTD(1)),
 	ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
 	LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
+
 	INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
 	INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
 	INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
@@ -25,7 +28,7 @@ CREATE TABLE IF NOT EXISTS otel_logs (
 	INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
 	INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1
 ) ENGINE = MergeTree()
-TTL toDateTime("Timestamp") + toIntervalDay(180)
-PARTITION BY toDate(Timestamp)
-ORDER BY (ServiceName, SeverityText, toUnixTimestamp(Timestamp), TraceId)
-SETTINGS index_granularity=8192, ttl_only_drop_parts = 1;
+PARTITION BY toYYYYMM(TimestampDate)
+ORDER BY (TimestampDate, TimestampTime)
+TTL TimestampTime + toIntervalDay(180)
+SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
@@ -133,34 +133,37 @@ const (
 	// language=ClickHouse SQL
 	createLogsTableSQL = `
 CREATE TABLE IF NOT EXISTS %s %s (
-     Timestamp DateTime64(9) CODEC(Delta, ZSTD(1)),
-     TraceId String CODEC(ZSTD(1)),
-     SpanId String CODEC(ZSTD(1)),
-     TraceFlags UInt32 CODEC(ZSTD(1)),
-     SeverityText LowCardinality(String) CODEC(ZSTD(1)),
-     SeverityNumber Int32 CODEC(ZSTD(1)),
-     ServiceName LowCardinality(String) CODEC(ZSTD(1)),
-     Body String CODEC(ZSTD(1)),
-     ResourceSchemaUrl String CODEC(ZSTD(1)),
-     ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
-     ScopeSchemaUrl String CODEC(ZSTD(1)),
-     ScopeName String CODEC(ZSTD(1)),
-     ScopeVersion String CODEC(ZSTD(1)),
-     ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
-     LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
-     INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
-     INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
-     INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1
+	Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)),
+	TimestampDate Date DEFAULT toDate(Timestamp),
+	TimestampTime DateTime DEFAULT toDateTime(Timestamp),
+	TraceId String CODEC(ZSTD(1)),
+	SpanId String CODEC(ZSTD(1)),
+	TraceFlags UInt8,
+	SeverityText LowCardinality(String) CODEC(ZSTD(1)),
+	SeverityNumber UInt8,
+	ServiceName LowCardinality(String) CODEC(ZSTD(1)),
+	Body String CODEC(ZSTD(1)),
+	ResourceSchemaUrl LowCardinality(String) CODEC(ZSTD(1)),
+	ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
+	ScopeSchemaUrl LowCardinality(String) CODEC(ZSTD(1)),
+	ScopeName String CODEC(ZSTD(1)),
+	ScopeVersion LowCardinality(String) CODEC(ZSTD(1)),
+	ScopeAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
+	LogAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
+
+	INDEX idx_trace_id TraceId TYPE bloom_filter(0.001) GRANULARITY 1,
+	INDEX idx_res_attr_key mapKeys(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_res_attr_value mapValues(ResourceAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_scope_attr_key mapKeys(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1,
+	INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1
 ) ENGINE = %s
+PARTITION BY toYYYYMM(TimestampDate)
+ORDER BY (TimestampDate, TimestampTime)
 %s
-PARTITION BY toDate(Timestamp)
-ORDER BY (ServiceName, SeverityText, toUnixTimestamp(Timestamp), TraceId)
-SETTINGS index_granularity=8192, ttl_only_drop_parts = 1;
+SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1;
 `
 	// language=ClickHouse SQL
 	insertLogsSQLTemplate = `INSERT INTO %s (
@@ -238,7 +241,7 @@ func createLogsTable(ctx context.Context, cfg *Config, db *sql.DB) error {
 }
 
 func renderCreateLogsTableSQL(cfg *Config) string {
-	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Timestamp")
+	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "TimestampTime")
 	return fmt.Sprintf(createLogsTableSQL, cfg.LogsTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr)
 }
 

@@ -47,7 +47,7 @@ func (e *metricsExporter) start(ctx context.Context, _ component.Host) error {
 		return err
 	}
 
-	ttlExpr := generateTTLExpr(e.cfg.TTLDays, e.cfg.TTL, "TimeUnix")
+	ttlExpr := generateTTLExpr(e.cfg.TTLDays, e.cfg.TTL, "toDateTime(TimeUnix)")
 	return internal.NewMetricsTable(ctx, e.cfg.MetricsTableName, e.cfg.ClusterString(), e.cfg.TableEngineString(), ttlExpr, e.client)
 }
 

@@ -295,12 +295,12 @@ func renderInsertTracesSQL(cfg *Config) string {
 }
 
 func renderCreateTracesTableSQL(cfg *Config) string {
-	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Timestamp")
+	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "toDateTime(Timestamp)")
 	return fmt.Sprintf(createTracesTableSQL, cfg.TracesTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr)
 }
 
 func renderCreateTraceIDTsTableSQL(cfg *Config) string {
-	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "Start")
+	ttlExpr := generateTTLExpr(cfg.TTLDays, cfg.TTL, "toDateTime(Start)")
 	return fmt.Sprintf(createTraceIDTsTableSQL, cfg.TracesTableName, cfg.ClusterString(), cfg.TableEngineString(), ttlExpr)
 }
 

@@ -126,19 +126,19 @@ func createMetricExporter(
 
 func generateTTLExpr(ttlDays uint, ttl time.Duration, timeField string) string {
 	if ttlDays > 0 {
-		return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalDay(%d)`, timeField, ttlDays)
+		return fmt.Sprintf(`TTL %s + toIntervalDay(%d)`, timeField, ttlDays)
 	}
 
 	if ttl > 0 {
 		switch {
 		case ttl%(24*time.Hour) == 0:
-			return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalDay(%d)`, timeField, ttl/(24*time.Hour))
+			return fmt.Sprintf(`TTL %s + toIntervalDay(%d)`, timeField, ttl/(24*time.Hour))
 		case ttl%(time.Hour) == 0:
-			return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalHour(%d)`, timeField, ttl/time.Hour)
+			return fmt.Sprintf(`TTL %s + toIntervalHour(%d)`, timeField, ttl/time.Hour)
 		case ttl%(time.Minute) == 0:
-			return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalMinute(%d)`, timeField, ttl/time.Minute)
+			return fmt.Sprintf(`TTL %s + toIntervalMinute(%d)`, timeField, ttl/time.Minute)
 		default:
-			return fmt.Sprintf(`TTL toDateTime(%s) + toIntervalSecond(%d)`, timeField, ttl/time.Second)
+			return fmt.Sprintf(`TTL %s + toIntervalSecond(%d)`, timeField, ttl/time.Second)
 		}
 	}
 	return ""

@@ -95,6 +95,8 @@ func verifyExportLog(t *testing.T, logExporter *logsExporter) {
 
 	type log struct {
 		Timestamp          string            `db:"Timestamp"`
+		TimestampDate      string            `db:"TimestampDate"`
+		TimestampTime      string            `db:"TimestampTime"`
 		TraceID            string            `db:"TraceId"`
 		SpanID             string            `db:"SpanId"`
 		TraceFlags         uint32            `db:"TraceFlags"`
@@ -115,6 +117,8 @@ func verifyExportLog(t *testing.T, logExporter *logsExporter) {
 
 	expectLog := log{
 		Timestamp:         "2023-12-25T09:53:49Z",
+		TimestampDate:     "2023-12-25T00:00:00Z",
+		TimestampTime:     "2023-12-25T09:53:49Z",
 		TraceID:           "01020300000000000000000000000000",
 		SpanID:            "0102030000000000",
 		SeverityText:      "error",