From 7467bfc3fe83eea20adb9cc384e73efd4f68fd8b Mon Sep 17 00:00:00 2001 From: stdpain <34912776+stdpain@users.noreply.github.com> Date: Wed, 30 Aug 2023 21:13:35 +0800 Subject: [PATCH] [BugFix] Fix global dict late materialize optimize ignore delete conditions (#30156) Signed-off-by: stdpain --- .../storage/rowset/dictcode_column_iterator.h | 1 + .../R/test_delete_conditions | 139 ++++++++++++++++++ .../T/test_delete_conditions | 55 +++++++ 3 files changed, 195 insertions(+) create mode 100644 test/sql/test_low_cardinality/R/test_delete_conditions create mode 100644 test/sql/test_low_cardinality/T/test_delete_conditions diff --git a/be/src/storage/rowset/dictcode_column_iterator.h b/be/src/storage/rowset/dictcode_column_iterator.h index c76d4993f43eff..e657af456f55b6 100644 --- a/be/src/storage/rowset/dictcode_column_iterator.h +++ b/be/src/storage/rowset/dictcode_column_iterator.h @@ -113,6 +113,7 @@ class GlobalDictCodeColumnIterator final : public ColumnIterator { RETURN_IF_ERROR(_col_iter->fetch_dict_codes_by_rowid(rowids, size, _local_dict_code_col.get())); RETURN_IF_ERROR(decode_dict_codes(*_local_dict_code_col, values)); _swap_null_columns(_local_dict_code_col.get(), values); + values->set_delete_state(_local_dict_code_col->delete_state()); return Status::OK(); } diff --git a/test/sql/test_low_cardinality/R/test_delete_conditions b/test/sql/test_low_cardinality/R/test_delete_conditions new file mode 100644 index 00000000000000..e5fc5f10a37c6f --- /dev/null +++ b/test/sql/test_low_cardinality/R/test_delete_conditions @@ -0,0 +1,139 @@ +-- name: test_delete_condition +create table t0 ( + c0 string, + c1 string, + c2 int, + c3 int +) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 1 PROPERTIES('replication_num' = '1'); +-- result: +-- !result +insert into t0 SELECT generate_series%100, generate_series%100, generate_series%100, generate_series%100 FROM TABLE(generate_series(1, 65535)); +-- result: +-- !result +insert into t0 values (null, null, null, null); +-- result: +-- !result +[UC] analyze full table t0; +-- result: +-- !result +function: wait_global_dict_ready('c0', 't0') +-- result: + +-- !result +delete from t0 where c0 = "s_1"; +-- result: +-- !result +select count(*) from t0; +-- result: +65536 +-- !result +select count(*) from t0 where c0 = "s_2"; +-- result: +0 +-- !result +select count(*) from t0 where c0 != "s_2"; +-- result: +65535 +-- !result +select count(*) from t0 where c0 is null; +-- result: +1 +-- !result +select count(*) from t0 where c0 = "s_2" and c1 = "s_2"; +-- result: +0 +-- !result +select count(*) from t0 where c0 = "s_2" and c1 = "s_3"; +-- result: +0 +-- !result +select distinct c0, c1, c2, c3 from t0 where c0 = "s_2" and c1 = "s_2" order by 1,2,3,4 limit 5; +-- result: +-- !result +select distinct c0, c1, c2, c3 from t0 where c1 = "s_2" order by 1,2,3,4 limit 5; +-- result: +-- !result +delete from t0 where c0 is null; +-- result: +-- !result +select count(*) from t0; +-- result: +65535 +-- !result +select count(*), count(c0) from t0 where c0 is not null; +-- result: +65535 65535 +-- !result +select count(*), count(c0) from t0 where c0 is not null; +-- result: +65535 65535 +-- !result +select count(*), count(c0) from t0 where c1 is not null; +-- result: +65535 65535 +-- !result +select count(*), count(c0) from t0 where c2 is not null; +-- result: +65535 65535 +-- !result +select count(*), count(c0), count(distinct c0) from t0 where c2 is not null; +-- result: +65535 65535 100 +-- !result +delete from t0 where c0 = "s_2"; +-- result: +-- !result +select count(*) from t0 where c0 = "s_2"; +-- result: +0 +-- !result +select count(*) from t0 where c0 != "s_2"; +-- result: +65535 +-- !result +select count(*) from t0 where c1 = "s_2"; +-- result: +0 +-- !result +select count(*) from t0 where c1 = "s_2" or c0 = "s_2"; +-- result: +0 +-- !result +delete from t0 where c2 = 1000; +-- result: +-- !result +select count(*), count(c0), count(distinct c0), max(c0) from t0; +-- result: +65535 65535 100 99 +-- !result +select count(*) from t0 where c0 = "s_3"; +-- result: +0 +-- !result +select count(*) from t0 where c1 = "s_1000"; +-- result: +0 +-- !result +select count(*) from t0 where c2 = 1000; +-- result: +0 +-- !result +delete from t0 where c0 != ""; +-- result: +-- !result +select count(*) from t0; +-- result: +0 +-- !result +select count(*) from t0 where c0 = "s_3"; +-- result: +0 +-- !result +select count(*) from t0 where c1 = "s_3"; +-- result: +0 +-- !result +select count(*) from t0 where c2 = 4; +-- result: +0 +-- !result \ No newline at end of file diff --git a/test/sql/test_low_cardinality/T/test_delete_conditions b/test/sql/test_low_cardinality/T/test_delete_conditions new file mode 100644 index 00000000000000..56316fca9ac094 --- /dev/null +++ b/test/sql/test_low_cardinality/T/test_delete_conditions @@ -0,0 +1,55 @@ +-- name: test_delete_condition +create table t0 ( + c0 string, + c1 string, + c2 int, + c3 int +) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 1 PROPERTIES('replication_num' = '1'); + +-- analyze and wait dictioanry +-- insert +insert into t0 SELECT generate_series%100, generate_series%100, generate_series%100, generate_series%100 FROM TABLE(generate_series(1, 65535)); +insert into t0 values (null, null, null, null); +[UC] analyze full table t0; +function: wait_global_dict_ready('c0', 't0') + +-- delete by key +delete from t0 where c0 = "s_1"; + +select count(*) from t0; +select count(*) from t0 where c0 = "s_2"; +select count(*) from t0 where c0 != "s_2"; +select count(*) from t0 where c0 is null; +select count(*) from t0 where c0 = "s_2" and c1 = "s_2"; +select count(*) from t0 where c0 = "s_2" and c1 = "s_3"; +select distinct c0, c1, c2, c3 from t0 where c0 = "s_2" and c1 = "s_2" order by 1,2,3,4 limit 5; +select distinct c0, c1, c2, c3 from t0 where c1 = "s_2" order by 1,2,3,4 limit 5; + +-- delete null +delete from t0 where c0 is null; +select count(*) from t0; +select count(*), count(c0) from t0 where c0 is not null; +select count(*), count(c0) from t0 where c0 is not null; +select count(*), count(c0) from t0 where c1 is not null; +select count(*), count(c0) from t0 where c2 is not null; +select count(*), count(c0), count(distinct c0) from t0 where c2 is not null; + +-- +delete from t0 where c0 = "s_2"; +select count(*) from t0 where c0 = "s_2"; +select count(*) from t0 where c0 != "s_2"; +select count(*) from t0 where c1 = "s_2"; +select count(*) from t0 where c1 = "s_2" or c0 = "s_2"; +-- delete by value +delete from t0 where c2 = 1000; +select count(*), count(c0), count(distinct c0), max(c0) from t0; +select count(*) from t0 where c0 = "s_3"; +select count(*) from t0 where c1 = "s_1000"; +select count(*) from t0 where c2 = 1000; + +-- delete all +delete from t0 where c0 != ""; +select count(*) from t0; +select count(*) from t0 where c0 = "s_3"; +select count(*) from t0 where c1 = "s_3"; +select count(*) from t0 where c2 = 4;