From d7d31b8857adb6e3de3c7bfe4506976d9cdf2f15 Mon Sep 17 00:00:00 2001 From: Laurens Kuiper Date: Fri, 23 Aug 2024 12:50:51 +0200 Subject: [PATCH] remove unnecessary optimization --- src/storage/statistics/column_statistics.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/storage/statistics/column_statistics.cpp b/src/storage/statistics/column_statistics.cpp index b2707c7addcc..627ae1b8e4c0 100644 --- a/src/storage/statistics/column_statistics.cpp +++ b/src/storage/statistics/column_statistics.cpp @@ -46,18 +46,9 @@ void ColumnStatistics::SetDistinct(unique_ptr distinct) { } void ColumnStatistics::UpdateDistinctStatistics(Vector &v, idx_t count) { - static constexpr idx_t MAXIMUM_STRING_LENGTH_FOR_DISTINCT = 64; if (!distinct_stats) { return; } - if (stats.GetType().InternalType() == PhysicalType::VARCHAR && StringStats::HasMaxStringLength(stats) && - StringStats::MaxStringLength(stats) > MAXIMUM_STRING_LENGTH_FOR_DISTINCT) { - // We start bailing out on distinct statistics if we encounter long strings, - // because hashing them for HLL is expensive and they probably won't be used as join keys anyway. - // If they are used as join keys, we will still have decent join orders (same method as Parquet) - distinct_stats.reset(); - return; - } distinct_stats->Update(v, count); }