From a1a0505c7e56999e544e895d08f3a539977df4a9 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Wed, 7 Aug 2024 12:10:58 -0400 Subject: [PATCH 1/5] add ch fxnality --- NEWS.md | 4 +++ ext/CHExt.jl | 56 ++++++++++++++++++++++++++++++------------ src/TidierDB.jl | 65 ++++++++++++++++++++++++++++++++++++------------- src/structs.jl | 9 ++++--- 4 files changed, 99 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4e4e74a..a866c60 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # TidierDB.jl updates +## v0.3.2 - 2024-08-15 +- adds read in support for URLs for ClickHouse +- adds support for reading from multiple URLs at once as a vector of paths in `db_table` when using ClickHouse + ## v0.3.1 - 2024-07-28 - adds support for reading from multiple files at once as a vector of paths in `db_table` when using DuckDB - ie `db_table(db, ["path1", "path2"])` diff --git a/ext/CHExt.jl b/ext/CHExt.jl index decdc5a..bd94090 100644 --- a/ext/CHExt.jl +++ b/ext/CHExt.jl @@ -25,20 +25,46 @@ end # ClickHouse function TidierDB.get_table_metadata(conn::ClickHouse.ClickHouseSock, table_name::String) - # Query to get column names and types from INFORMATION_SCHEMA - query = """ - SELECT - name AS column_name, - type AS data_type - FROM system.columns - WHERE table = '$table_name' AND database = 'default' - """ - result = ClickHouse.select_df(conn,query) - - result[!, :current_selxn] .= 1 - result[!, :table_name] .= table_name - # Adjust the select statement to include the new table_name column - return select(result, 1 => :name, 2 => :type, :current_selxn, :table_name) + if occursin("/", table_name) || occursin("http", table_name) + + + query = "DESCRIBE url($table_name) + SETTINGS enable_url_encoding=0, + max_http_get_redirects=10 + " + # println(query) + column_info = ClickHouse.select_df(conn, query) + column_info = select(column_info, :name, :type) + + # Prepare the column_info DataFrame + + # Add the table name and selection marker + column_info[!, :current_selxn] .= 1 + table_name = if occursin(r"[:/]", table_name) + split(basename(table_name), '.')[1] + #"'$table_name'" + else + table_name + end + column_info[!, :table_name] .= table_name + + else + # Standard case: Querying from system.columns + query = """ + SELECT + name AS column_name, + type AS data_type + FROM system.columns + WHERE table = '$table_name' AND database = 'default' + """ + column_info = ClickHouse.select_df(conn, query) + + # Add the table name and selection marker + column_info[!, :current_selxn] .= 1 + column_info[!, :table_name] .= table_name + end + # Return the result with relevant columns + return select(column_info, 1 => :name, 2 => :type, :current_selxn, :table_name) end @@ -51,4 +77,4 @@ function TidierDB.final_collect(sqlquery, ::Type{<:clickhouse}) return df_result end -end +end \ No newline at end of file diff --git a/src/TidierDB.jl b/src/TidierDB.jl index ae5d614..bb5731a 100644 --- a/src/TidierDB.jl +++ b/src/TidierDB.jl @@ -14,7 +14,6 @@ using GZip @reexport using Chain @reexport using DuckDB - export db_table, set_sql_mode, @arrange, @group_by, @filter, @select, @mutate, @summarize, @summarise, @distinct, @left_join, @right_join, @inner_join, @count, @window_order, @window_frame, @show_query, @collect, @slice_max, @slice_min, @slice_sample, @rename, copy_to, duckdb_open, duckdb_connect, @semi_join, @full_join, @@ -144,8 +143,11 @@ function finalize_query(sqlquery::SQLQuery) if !isempty(sqlquery.groupBy) push!(query_parts, "" * sqlquery.groupBy) end if !isempty(sqlquery.having) push!(query_parts, " " * sqlquery.having) end if !isempty(sqlquery.orderBy) push!(query_parts, " " * sqlquery.orderBy) end - complete_query = join(filter(!isempty, query_parts), " ") + + if !isempty(sqlquery.ch_settings) && current_sql_mode[] == clickhouse() + complete_query = complete_query * " \n " * string(sqlquery.ch_settings) + end complete_query = replace(complete_query, "&&" => " AND ", "||" => " OR ", "FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ", "SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", ) @@ -223,11 +225,14 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de else error("Unsupported SQL mode: $(current_sql_mode[])") end - + clickhouse_settings ="" formatted_table_name = if current_sql_mode[] == snowflake() "$(db.database).$(db.schema).$table_name" elseif db isa DatabricksConnection || current_sql_mode[] == databricks() "$(db.database).$(db.schema).$table_name" + elseif current_sql_mode[] == clickhouse() && occursin(r"[:/]", table_name) + clickhouse_settings = " SETTINGS enable_url_encoding=0, max_http_get_redirects=10 " + "url('$table_name')" elseif iceberg "iceberg_scan('$table_name', allow_moved_paths = true)" elseif delta @@ -240,7 +245,7 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de table_name end - return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params) + return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params, ch_settings=clickhouse_settings) end function db_table(db, table::Vector{String}, athena_params::Any=nothing) @@ -249,23 +254,49 @@ function db_table(db, table::Vector{String}, athena_params::Any=nothing) end # Get file type from the first file - file_type = lowercase(splitext(first(table))[2]) - # Format paths: wrap each in single quotes and join with commas - formatted_paths = join(map(path -> "'$path'", table), ", ") + # Check the current SQL mode + if current_sql_mode[] == duckdb() + file_type = lowercase(splitext(first(table))[2]) + + # Format paths: wrap each in single quotes and join with commas + formatted_paths = join(map(path -> "'$path'", table), ", ") + + formatted_table_name = if file_type == ".csv" + "read_csv([$formatted_paths])" + elseif file_type == ".parquet" + "read_parquet([$formatted_paths])" + else + error("Unsupported file type: $file_type") + end + + # Get metadata from the first file + meta_vec = first(table) + metadata = get_table_metadata(db, "'$meta_vec'") + + return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params) + + elseif current_sql_mode[] == clickhouse() + + # Construct the ClickHouse SQL query with UNION ALL for each file + union_queries = join(map(path -> """ + SELECT * + FROM url('$path') + """, table), " UNION ALL ") + + # Wrap the union_queries in a subquery for further processing + formatted_table_name = "($union_queries)" + if occursin(r"[:/]", first(table)) + clickhouse_settings = " SETTINGS enable_url_encoding=0, max_http_get_redirects=10 " + end + meta_vec = first(table) + metadata = get_table_metadata(db, "'$meta_vec'") + + return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params, ch_settings = clickhouse_settings) - formatted_table_name = if file_type == ".csv" - "read_csv([$formatted_paths])" - elseif file_type == ".parquet" - "read_parquet([$formatted_paths])" else - error("Unsupported file type: $file_type") + error("Unsupported SQL mode: $(current_sql_mode[])") end - meta_vec = first(table) - # Get metadata from the first file - metadata = get_table_metadata(db, "'$meta_vec'") - - return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params) end """ diff --git a/src/structs.jl b/src/structs.jl index 1cc0c50..96b8081 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -27,9 +27,12 @@ mutable struct SQLQuery ctes::Vector{CTE} cte_count::Int athena_params::Any - - function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing) - new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, metadata, distinct, db, ctes, cte_count, athena_params) + ch_settings::String + function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", + window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), + distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, ch_settings::String="") + new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, + metadata, distinct, db, ctes, cte_count, athena_params, ch_settings) end end From 3798d67fef7a6bbd1609962606004414210cda0d Mon Sep 17 00:00:00 2001 From: drizk1 Date: Sat, 10 Aug 2024 11:50:37 -0400 Subject: [PATCH 2/5] clarify *filereading support in docs and docstrings for duckdb --- src/docstrings.jl | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/docstrings.jl b/src/docstrings.jl index b8d8443..61f5a7f 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1049,19 +1049,31 @@ const docstring_db_table = """ db_table(database, table_name, athena_params, delta = false, iceberg = false) -`db_table` starts the underlying SQL query struct, adding the metadata and table. +`db_table` starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a +name it will not copy it to memory, but rather ready directly from the file. + # Arguments `database`: The Database or connection object `table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types - -CSV - -Parquet - -Json - -Iceberg - -Delta - -S3 tables from AWS or Google Cloud - - vector of CSV or Parquet paths to read multiple at once + --CSV + + --Parquet + + --Json + + --Iceberg + + --Delta + + --S3 tables from AWS or Google Cloud + + --vector of CSV or Parquet paths to read multiple at once +DuckDB and ClickHouse support vectors of paths and URLs. +DuckDB also supports use of `*` wildcards to read all files of a type in a location such as: +`db_table(db, "Path/to/testing_files/*.parquet")` + `delta`: must be true to read delta files `iceberg`: must be true to read iceberg finalize_ctes From 6e4d8424608e35d9875bec85ac970f757800e836 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Sat, 10 Aug 2024 19:34:12 -0400 Subject: [PATCH 3/5] fix struct --- src/structs.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/structs.jl b/src/structs.jl index 975b140..ba55263 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -27,9 +27,9 @@ mutable struct SQLQuery ctes::Vector{CTE} cte_count::Int athena_params::Any - limit::String - + ch_settings::String + function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, limit::String="", From 9573fd4af13522ff87739146ab6101d518dc9ca8 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Sun, 11 Aug 2024 13:03:22 -0400 Subject: [PATCH 4/5] fix from query for CH, add `db_table` section in getting started, preview intermediates, count bugfix --- NEWS.md | 5 ++- docs/examples/UserGuide/from_queryex.jl | 47 ++++++++++++++++++++++ docs/examples/UserGuide/getting_started.jl | 17 ++++++-- src/TBD_macros.jl | 11 +++++ src/docstrings.jl | 5 +-- src/structs.jl | 6 ++- 6 files changed, 81 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index bb50831..5e9a428 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,10 @@ - adds support for reading URLS in `db_table` with ClickHouse - adds support for reading from multiple files at once as a vector of urls in `db_table` when using ClickHouse - ie `db_table(db, ["url1", "url2"])` - +- adds docs around using `*` for reading in multiple files from folder +- adds docs for `db_table` +- adds docs for previewing or saving intermediate tables in ongoing `@chain` +- Bugfix: `@count` updates metadata ## v0.3.1 - 2024-07-28 - adds support for reading from multiple files at once as a vector of paths in `db_table` when using DuckDB diff --git a/docs/examples/UserGuide/from_queryex.jl b/docs/examples/UserGuide/from_queryex.jl index 785233a..9ab67db 100644 --- a/docs/examples/UserGuide/from_queryex.jl +++ b/docs/examples/UserGuide/from_queryex.jl @@ -59,4 +59,51 @@ # 1 │ Pontiac Firebird 8 19.2 # 2 │ Toyota Corolla 4 33.9 # 3 │ Hornet 4 Drive 6 21.4 +# ``` + +# ## Preview an intermediate table +# While querying a dataset, you may wish to see an intermediate table, or even save it. You can use `@aside` and `from_query(_)`, illustrated below, to do just that. +# While we opted to print the results in this simple example below, we could have saved them by using `name = DB.@chain...` + +# ```julia +# import ClickHouse; +# conn = conn = DB.connect(DB.clickhouse(); host="localhost", port=19000, database="default", user="default", password="") +# path = "https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset/resolve/refs%2Fconvert%2Fparquet/default/train/0000.parquet" +# DB.@chain DB.db_table(conn, path) begin +# DB.@count(cyl) +# @aside println(DB.@chain DB.from_query(_) DB.@head(5) DB.@collect) +# DB.@arrange(desc(count)) +# DB.@collect +# end +# ``` +# ``` +# 5×2 DataFrame +# Row │ artists count +# │ String? UInt64 +# ─────┼───────────────── +# 1 │ missing 1 +# 2 │ Wizo 3 +# 3 │ MAGIC! 3 +# 4 │ Macaco 1 +# 5 │ SOYOU 1 +# 31438×2 DataFrame +# Row │ artists count +# │ String? UInt64 +# ───────┼───────────────────────── +# 1 │ The Beatles 279 +# 2 │ George Jones 271 +# 3 │ Stevie Wonder 236 +# 4 │ Linkin Park 224 +# 5 │ Ella Fitzgerald 222 +# 6 │ Prateek Kuhad 217 +# 7 │ Feid 202 +# ⋮ │ ⋮ ⋮ +# 31432 │ Leonard 1 +# 31433 │ marcos g 1 +# 31434 │ BLVKSHP 1 +# 31435 │ Memtrix 1 +# 31436 │ SOYOU 1 +# 31437 │ Macaco 1 +# 31438 │ missing 1 +# 31424 rows omitted # ``` \ No newline at end of file diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl index 0d2299a..1376b33 100644 --- a/docs/examples/UserGuide/getting_started.jl +++ b/docs/examples/UserGuide/getting_started.jl @@ -14,17 +14,17 @@ # For example # Connecting to MySQL # ```julia -# conn = connect(mysql(); host="localhost", user="root", password="password", db="mydb") +# conn = DB.connect(DB.mysql(); host="localhost", user="root", password="password", db="mydb") # ``` # versus connecting to DuckDB # ```julia -# conn = connect(duckdb()) +# conn = DB.connect(DB.duckdb()) # ``` # ## Package Extensions # The following backends utilize package extensions. To use one of backends listed below, you will need to write `using Library` -# - ClickHouse: `using ClickHouse` +# - ClickHouse: `import ClickHouse` # - MySQL and MariaDB: `using MySQL` # - MSSQL: `using ODBC` # - Postgres: `using LibPQ` @@ -33,3 +33,14 @@ # - Oracle: `using ODBC` # - Google BigQuery: `using GoogleCloud` +# ## `db_table` +# What does `db_table` do? +# `db_table` starts the underlying SQL query struct, in addition to pulling the table metadata and storing it there. Storing metadata is what enables a lazy interface that also supports tidy selection. +# `db_table` has two required arguments: `connection` and `table` +# `table` can be a table name on a database or a path/url to file to read. When passing `db_table` a path or url, the table is not copied into memory. +# With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use `*` read in all files matching the pattern. +# For example, the below would read all files that end in `.csv` in the given folder. +# ```julia +# db_table(db, "folder/path/*.csv") +# ``` +# `db_table` also supports iceberg, delta, and S3 file paths via DuckDB. \ No newline at end of file diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl index 76961df..2cdd154 100644 --- a/src/TBD_macros.jl +++ b/src/TBD_macros.jl @@ -455,15 +455,26 @@ macro count(sqlquery, group_by_columns...) group_clause = join(group_by_cols_str, ", ") return quote + sq = $(esc(sqlquery)) if isa(sq, SQLQuery) # Interpolate `group_clause` directly into the quoted code to avoid scope issues if !isempty($group_clause) + for col in $group_by_cols_str + $(esc(sqlquery)).metadata.current_selxn .= 0 + matching_indices = findall($(esc(sqlquery)).metadata.name .== col) + $(esc(sqlquery)).metadata.current_selxn[matching_indices] .= 1 + end sq.select = "SELECT " * $group_clause * ", COUNT(*) AS count" sq.groupBy = "GROUP BY " * $group_clause + push!(sq.metadata, Dict("name" => "count", "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from)) + else # If no grouping columns are specified, just count all records + $(esc(sqlquery)).metadata.current_selxn .= 0 sq.select = "SELECT COUNT(*) AS count" + push!(sq.metadata, Dict("name" => "count", "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from)) + end # Adjustments for previously set GROUP BY or ORDER BY clauses might be needed here diff --git a/src/docstrings.jl b/src/docstrings.jl index 1e361c7..d41f7d1 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1052,9 +1052,7 @@ const docstring_db_table = `db_table` starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a name it will not copy it to memory, but rather ready directly from the file. - # Arguments - `database`: The Database or connection object `table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types --CSV @@ -1071,9 +1069,8 @@ name it will not copy it to memory, but rather ready directly from the file. --vector of CSV or Parquet paths to read multiple at once DuckDB and ClickHouse support vectors of paths and URLs. -DuckDB also supports use of `*` wildcards to read all files of a type in a location such as: +DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as: `db_table(db, "Path/to/testing_files/*.parquet")` - `delta`: must be true to read delta files `iceberg`: must be true to read iceberg finalize_ctes diff --git a/src/structs.jl b/src/structs.jl index ba55263..77e786d 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -29,7 +29,7 @@ mutable struct SQLQuery athena_params::Any limit::String ch_settings::String - + function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, limit::String="", @@ -96,7 +96,9 @@ function from_query(query::TidierDB.SQLQuery) db=query.db, ctes=[copy(cte) for cte in query.ctes], cte_count=query.cte_count, - athena_params = query.athena_params + athena_params = query.athena_params, + limit = query.limit, + ch_settings = query.ch_settings ) return new_query end \ No newline at end of file From fb1df4758a6883b71de29a91770bfa9e6d66b881 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Sun, 11 Aug 2024 13:27:04 -0400 Subject: [PATCH 5/5] tiny doc tweak --- docs/examples/UserGuide/from_queryex.jl | 2 +- .../UserGuide/functions_pass_to_DB.jl | 1 + docs/examples/UserGuide/getting_started.jl | 3 ++ src/docstrings.jl | 33 ++++++++----------- 4 files changed, 18 insertions(+), 21 deletions(-) diff --git a/docs/examples/UserGuide/from_queryex.jl b/docs/examples/UserGuide/from_queryex.jl index 9ab67db..8a39597 100644 --- a/docs/examples/UserGuide/from_queryex.jl +++ b/docs/examples/UserGuide/from_queryex.jl @@ -61,7 +61,7 @@ # 3 │ Hornet 4 Drive 6 21.4 # ``` -# ## Preview an intermediate table +# ## Preview or save an intermediate table # While querying a dataset, you may wish to see an intermediate table, or even save it. You can use `@aside` and `from_query(_)`, illustrated below, to do just that. # While we opted to print the results in this simple example below, we could have saved them by using `name = DB.@chain...` diff --git a/docs/examples/UserGuide/functions_pass_to_DB.jl b/docs/examples/UserGuide/functions_pass_to_DB.jl index d1940c9..e204de0 100644 --- a/docs/examples/UserGuide/functions_pass_to_DB.jl +++ b/docs/examples/UserGuide/functions_pass_to_DB.jl @@ -10,6 +10,7 @@ # ``` # using TidierDB +# db = connect(duckdb()) # path = "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" # copy_to(db, path, "mtcars"); # diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl index 1376b33..94d432d 100644 --- a/docs/examples/UserGuide/getting_started.jl +++ b/docs/examples/UserGuide/getting_started.jl @@ -9,6 +9,7 @@ # Alternatively, `using Tidier` will import TidierDB in the above manner for you, where TidierDB functions and macros will be available as `DB.@mutate()` and so on, and the TidierData equivalent would be `@mutate()`. +# ## Connecting # To connect to a database, you can uset the `connect` function as shown below, or establish your own connection through the respecitve libraries. # For example @@ -21,6 +22,8 @@ # conn = DB.connect(DB.duckdb()) # ``` +# You can also use establish a connection through an alternate method that you preferred, and use that as your connection as well. + # ## Package Extensions # The following backends utilize package extensions. To use one of backends listed below, you will need to write `using Library` diff --git a/src/docstrings.jl b/src/docstrings.jl index d41f7d1..556556e 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1053,26 +1053,19 @@ const docstring_db_table = name it will not copy it to memory, but rather ready directly from the file. # Arguments -`database`: The Database or connection object -`table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types - --CSV - - --Parquet - - --Json - - --Iceberg - - --Delta - - --S3 tables from AWS or Google Cloud - - --vector of CSV or Parquet paths to read multiple at once -DuckDB and ClickHouse support vectors of paths and URLs. -DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as: -`db_table(db, "Path/to/testing_files/*.parquet")` -`delta`: must be true to read delta files -`iceberg`: must be true to read iceberg finalize_ctes +- `database`: The Database or connection object +- `table_name`: tablename as a string (name, local path, or URL). + - CSV/TSV + - Parquet + - Json + - Iceberg + - Delta + - S3 tables from AWS or Google Cloud + - DuckDB and ClickHouse support vectors of paths and URLs. + - DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as: +- `db_table(db, "Path/to/testing_files/*.parquet")` +- `delta`: must be true to read delta files +- `iceberg`: must be true to read iceberg finalize_ctes # Example ```julia