From 44f68619cf7e28c7e5dde95f6d05e845505d6658 Mon Sep 17 00:00:00 2001 From: Spencer Torres Date: Tue, 6 Aug 2024 11:14:33 -0400 Subject: [PATCH] [exporter/clickhouse] Update default logs table schema (2) (#34203) **Description:** Previously updated in #33611, I am opening this to start a discussion on further improvements that can be made to the table. Notable changes: - Changed from monthly partitions to daily. With `ttl_only_drop_parts=1`, this will help drop data for TTLs shorter than 1 month (such as when your log retention is only 7 days). - Changed `idx_body` granularity to `8`, which should reduce the index size (especially beneficial for cloud services with separate storage) - Removed `TimestampDate` column - Simplified primary key to only use `TimestampTime`. Performance difference is negligible if not better. Also makes queries easier to write-- with the current version it requires that you provide both `TimestampDate` and `TimestampTime` for optimal sorting performance. - Separated and updated order by. Now it matches the primary key, with the addition of `Timestamp`, so that nanoseconds sorting is preserved by default. Let me know if you have any more suggestions. **Link to tracking Issue:** **Testing:** **Documentation:** --- ...useexporter_update_default_logs_table.yaml | 27 +++++++++++++++++++ .../example/default_ddl/logs.sql | 8 +++--- exporter/clickhouseexporter/exporter_logs.go | 8 +++--- 3 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 .chloggen/clickhouseexporter_update_default_logs_table.yaml diff --git a/.chloggen/clickhouseexporter_update_default_logs_table.yaml b/.chloggen/clickhouseexporter_update_default_logs_table.yaml new file mode 100644 index 000000000000..12c014929cec --- /dev/null +++ b/.chloggen/clickhouseexporter_update_default_logs_table.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: clickhouseexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Updated the default logs table to a more optimized schema + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [34203] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: Improved partitioning and time range queries. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/exporter/clickhouseexporter/example/default_ddl/logs.sql b/exporter/clickhouseexporter/example/default_ddl/logs.sql index c177f1495d3b..94687073f808 100644 --- a/exporter/clickhouseexporter/example/default_ddl/logs.sql +++ b/exporter/clickhouseexporter/example/default_ddl/logs.sql @@ -2,7 +2,6 @@ CREATE TABLE IF NOT EXISTS otel_logs ( Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)), - TimestampDate Date DEFAULT toDate(Timestamp), TimestampTime DateTime DEFAULT toDateTime(Timestamp), TraceId String CODEC(ZSTD(1)), SpanId String CODEC(ZSTD(1)), @@ -26,9 +25,10 @@ CREATE TABLE IF NOT EXISTS otel_logs ( INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 + INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8 ) ENGINE = MergeTree() -PARTITION BY toYYYYMM(TimestampDate) -ORDER BY (ServiceName, TimestampDate, TimestampTime) +PARTITION BY toDate(TimestampTime) +PRIMARY KEY (ServiceName, TimestampTime) +ORDER BY (ServiceName, TimestampTime, Timestamp) TTL TimestampTime + toIntervalDay(180) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; diff --git a/exporter/clickhouseexporter/exporter_logs.go b/exporter/clickhouseexporter/exporter_logs.go index 921e468fd1e0..8a8a1f517d01 100644 --- a/exporter/clickhouseexporter/exporter_logs.go +++ b/exporter/clickhouseexporter/exporter_logs.go @@ -134,7 +134,6 @@ const ( createLogsTableSQL = ` CREATE TABLE IF NOT EXISTS %s %s ( Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)), - TimestampDate Date DEFAULT toDate(Timestamp), TimestampTime DateTime DEFAULT toDateTime(Timestamp), TraceId String CODEC(ZSTD(1)), SpanId String CODEC(ZSTD(1)), @@ -158,10 +157,11 @@ CREATE TABLE IF NOT EXISTS %s %s ( INDEX idx_scope_attr_value mapValues(ScopeAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_key mapKeys(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, INDEX idx_log_attr_value mapValues(LogAttributes) TYPE bloom_filter(0.01) GRANULARITY 1, - INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1 + INDEX idx_body Body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 8 ) ENGINE = %s -PARTITION BY toYYYYMM(TimestampDate) -ORDER BY (ServiceName, TimestampDate, TimestampTime) +PARTITION BY toDate(TimestampTime) +PRIMARY KEY (ServiceName, TimestampTime) +ORDER BY (ServiceName, TimestampTime, Timestamp) %s SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1; `