merge with main

carlopi · Jul 10, 2024 · c185b74 · c185b74
2 parents 5fe5016 + 104cb79
commit c185b74
Show file tree

Hide file tree

Showing 228 changed files with 26,013 additions and 6,719 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -46,7 +46,20 @@ body:
   - type: textarea
     attributes:
       label: To Reproduce
-      description: Steps to reproduce the behavior, preferably a [minimal reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example). Please format the code and the output as [code blocks](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks) and add the required imports for scripts (e.g., `import duckdb`). Bonus points if the steps only include SQL queries.
+      description: |
+        Please provide steps to reproduce the behavior, preferably a [minimal reproducible example](https://en.wikipedia.org/wiki/Minimal_reproducible_example). Please adhere the following guidelines:
+
+        * Format the code and the output as [code blocks](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks) using triple backticks:
+
+          ````
+          ```
+          CODE HERE
+          ```
+          ````
+        * Add all required imports for scripts, e.g., `import duckdb`, `import pandas as pd`.
+        * Remove all prompts from the scripts. This include DuckDB's 'D' prompt and Python's `>>>` prompt. Removing these prompts makes reproduction attempts quicker.
+        * Make sure that the script and its outputs are provided in separate code blocks.
+        * If applicable, please check whether the issue is reproducible via running plain SQL queries from the DuckDB CLI client.
     validations:
       required: true
 
@@ -85,7 +98,7 @@ body:
   - type: input
     attributes:
       label: "Affiliation:"
-      placeholder: e.g., Oracle
+      placeholder: e.g., Acme Corporation
     validations:
       required: true
 

diff --git a/.github/workflows/LinuxRelease.yml b/.github/workflows/LinuxRelease.yml
@@ -64,6 +64,7 @@ jobs:
       DEBUG_STACKTRACE: 1
       FORCE_WARN_UNUSED: 1
       DUCKDB_RUN_PARALLEL_CSV_TESTS: 1
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
     steps:
     - name: Handrolled checkout
@@ -153,6 +154,7 @@ jobs:
      TREAT_WARNINGS_AS_ERRORS: 1
      FORCE_WARN_UNUSED: 1
      DUCKDB_PLATFORM: linux_arm64
+     ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
    steps:
      - uses: actions/checkout@v3
@@ -196,6 +198,8 @@ jobs:
     runs-on: ubuntu-latest
     container: ubuntu:18.04
     needs: linux-release-64
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
     steps:
     - uses: actions/checkout@v3
@@ -236,6 +240,8 @@ jobs:
     runs-on: ubuntu-latest
     container: ubuntu:18.04
     needs: linux-release-64
+    env:
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
     steps:
       - uses: actions/checkout@v3

diff --git a/.github/workflows/Python.yml b/.github/workflows/Python.yml
@@ -121,6 +121,7 @@ jobs:
     needs: linux-python3-9
     env:
       GEN: ninja
+      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
 
     steps:
     - uses: actions/checkout@v3

diff --git a/.github/workflows/Regression.yml b/.github/workflows/Regression.yml
@@ -43,6 +43,7 @@ jobs:
     CXX: g++-10
     GEN: ninja
     BUILD_BENCHMARK: 1
+    BUILD_INET: 1
     BUILD_TPCH: 1
     BUILD_TPCDS: 1
     BUILD_HTTPFS: 1

diff --git a/benchmark/micro/join/join_order_optimizer_should_respect_limit.benchmark b/benchmark/micro/join/join_order_optimizer_should_respect_limit.benchmark
@@ -0,0 +1,13 @@
+# name: benchmark/micro/join/join_order_optimizer_should_respect_limit.benchmark
+# description: If a constant value limit operator exists, is should have influence on the estimated cardinality
+# group: [join]
+
+name join limit
+group join
+
+load
+create table t_left as select (random() * 1000000000)::INT a from range(400000);
+create table t_right as select range b from range(1000000000);
+
+run
+select * from t_left, (select * from t_right limit 10000) where a = b;
diff --git a/benchmark/micro/string/inet_escape_function.benchmark b/benchmark/micro/string/inet_escape_function.benchmark
@@ -0,0 +1,14 @@
+# name: benchmark/micro/string/inet_escape_function.benchmark
+# description: inet's extension escape function benchmark
+# group: [string]
+
+name html_escape benchmark
+group string
+
+require inet
+
+load
+CREATE TABLE html_text_tbl AS SELECT repeat('&', i%10) html_text FROM range(1000000) t(i);
+
+run
+SELECT html_escape(html_text) FROM html_text_tbl;
diff --git a/benchmark/micro/string/inet_unescape_charrefs.benchmark b/benchmark/micro/string/inet_unescape_charrefs.benchmark
@@ -0,0 +1,16 @@
+# name: benchmark/micro/string/inet_unescape_charrefs.benchmark
+# description: inet's extension unescape function benchmark
+# group: [string]
+
+name html_unescape benchmark for character references
+group string
+require inet
+
+load
+CREATE TABLE charrefs AS SELECT * FROM (VALUES ('&amp'), ('&Colon;'), ('&CounterClockwiseContourIntegral;'), ('&;'), ('&GreaterLess;'), ('&LeftArrowRightArrow;'), ('&downarrow;'), ('not &notin'), ('&quot;;'), ('&no charref'));
+INSERT INTO charrefs SELECT repeat('&nsucceq;', i%10) charref FROM range(1000) t(i);
+INSERT INTO charrefs SELECT repeat('&zfr;', i%10) charref FROM range(1000) t(i);
+INSERT INTO charrefs SELECT repeat('&Eacute;', i%7) html_text FROM range(997990) t(i);
+
+run
+SELECT html_unescape(charrefs.col0) FROM charrefs;
diff --git a/benchmark/micro/string/inet_unescape_codepoints.benchmark b/benchmark/micro/string/inet_unescape_codepoints.benchmark
@@ -0,0 +1,13 @@
+# name: benchmark/micro/string/inet_unescape_codepoints.benchmark
+# description: inet's extension unescape function benchmark
+# group: [string]
+
+name html_unescape benchmark with hexadecimal values
+group string
+require inet
+
+load
+CREATE TABLE html_hex_tbl AS SELECT format('&#x{:x}', i) html_text FROM range(1000000) t(i);
+
+run
+SELECT html_unescape(html_text) FROM html_hex_tbl;
diff --git a/benchmark/tpch/aggregate/top_n_in_group_agg.benchmark b/benchmark/tpch/aggregate/top_n_in_group_agg.benchmark
@@ -0,0 +1,12 @@
+# name: benchmark/tpch/aggregate/top_n_in_group_agg.benchmark
+# group: [aggregate]
+
+require tpch
+
+cache tpch_sf5.duckdb
+
+load
+CALL dbgen(sf=5);
+
+run
+SELECT max(l_extendedprice, 3) FROM lineitem GROUP BY l_suppkey ORDER BY ALL;
diff --git a/benchmark/tpch/aggregate/top_n_in_group_window.benchmark b/benchmark/tpch/aggregate/top_n_in_group_window.benchmark
@@ -0,0 +1,19 @@
+# name: benchmark/tpch/aggregate/top_n_in_group_window.benchmark
+# group: [aggregate]
+
+require tpch
+
+cache tpch_sf5.duckdb
+
+load
+CALL dbgen(sf=5);
+
+run
+SELECT rs.grp, array_agg(rs.val ORDER BY rid)
+FROM (
+  SELECT l_suppkey AS grp, l_extendedprice AS val, row_number() OVER (PARTITION BY l_suppkey ORDER BY l_extendedprice DESC) as rid
+  FROM lineitem ORDER BY l_suppkey DESC
+) as rs
+WHERE rid <= 3
+GROUP BY ALL
+ORDER BY ALL;
diff --git a/benchmark/tpch/join/join_filter_pushdown.benchmark b/benchmark/tpch/join/join_filter_pushdown.benchmark
@@ -0,0 +1,21 @@
+# name: benchmark/tpch/join/join_filter_pushdown.benchmark
+# description: Join filter pushdown
+# group: [join]
+
+name Join Filter Pushdown
+group join
+subgroup tpch
+
+require tpch
+
+cache tpch_sf1.duckdb
+
+load
+CALL dbgen(sf=1);
+
+run
+SELECT * from lineitem WHERE l_orderkey=(SELECT MAX(l_orderkey) FROM lineitem) ORDER BY ALL
+
+result IIIIIIIIIIIIIIII
+6000000	32255	2256	1	5.00	5936.25	0.04	0.03	N	O	1996-11-02	1996-11-19	1996-12-01	TAKE BACK RETURN	MAIL	riously pe
+6000000	96127	6128	2	28.00	31447.36	0.01	0.02	N	O	1996-09-22	1996-10-01	1996-10-21	NONE	AIR	pecial excuses nag evenly f
diff --git a/benchmark/tpch/join/partition_pushdown.benchmark b/benchmark/tpch/join/partition_pushdown.benchmark
@@ -0,0 +1,21 @@
+# name: benchmark/tpch/join/partition_pushdown.benchmark
+# description: Join filter pushdown into hive partitions
+# group: [join]
+
+name Hive Filter Join Filter Pushdown
+group join
+subgroup tpch
+
+require parquet
+
+require tpch
+
+load
+CALL dbgen(sf=1);
+COPY (FROM lineitem ORDER BY l_shipdate) TO 'lineitem_partitioned_shipdate' (FORMAT PARQUET, PARTITION_BY l_shipdate);
+
+run
+SELECT COUNT(*) from 'lineitem_partitioned_shipdate/**/*.parquet' WHERE l_shipdate=(SELECT MAX(l_shipdate) FROM lineitem)
+
+result I
+18