From a1a0505c7e56999e544e895d08f3a539977df4a9 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Wed, 7 Aug 2024 12:10:58 -0400
Subject: [PATCH 1/5] add ch fxnality

---
 NEWS.md         |  4 +++
 ext/CHExt.jl    | 56 ++++++++++++++++++++++++++++++------------
 src/TidierDB.jl | 65 ++++++++++++++++++++++++++++++++++++-------------
 src/structs.jl  |  9 ++++---
 4 files changed, 99 insertions(+), 35 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 4e4e74a..a866c60 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # TidierDB.jl updates
 
+## v0.3.2 - 2024-08-15
+- adds read in support for URLs for ClickHouse
+- adds support for reading from multiple URLs at once as a vector of paths in `db_table` when using ClickHouse
+
 ## v0.3.1 - 2024-07-28
 - adds support for reading from multiple files at once as a vector of paths in `db_table` when using DuckDB
     - ie `db_table(db, ["path1", "path2"])`
diff --git a/ext/CHExt.jl b/ext/CHExt.jl
index decdc5a..bd94090 100644
--- a/ext/CHExt.jl
+++ b/ext/CHExt.jl
@@ -25,20 +25,46 @@ end
 
  # ClickHouse
  function TidierDB.get_table_metadata(conn::ClickHouse.ClickHouseSock, table_name::String)
-    # Query to get column names and types from INFORMATION_SCHEMA
-    query = """
-    SELECT
-        name AS column_name,
-        type AS data_type
-    FROM system.columns
-    WHERE table = '$table_name' AND database = 'default'
-    """
-    result = ClickHouse.select_df(conn,query)
-
-    result[!, :current_selxn] .= 1
-    result[!, :table_name] .= table_name
-    # Adjust the select statement to include the new table_name column
-    return select(result, 1 => :name, 2 => :type, :current_selxn, :table_name)
+    if occursin("/", table_name) || occursin("http", table_name)
+
+     
+            query = "DESCRIBE url($table_name)
+            SETTINGS enable_url_encoding=0, 
+             max_http_get_redirects=10
+            "
+           # println(query)
+            column_info = ClickHouse.select_df(conn, query)
+            column_info = select(column_info, :name, :type)
+    
+        # Prepare the column_info DataFrame
+        
+        # Add the table name and selection marker
+        column_info[!, :current_selxn] .= 1
+        table_name = if occursin(r"[:/]", table_name)
+            split(basename(table_name), '.')[1]
+           #"'$table_name'"
+       else
+           table_name
+       end
+        column_info[!, :table_name] .= table_name
+        
+    else
+        # Standard case: Querying from system.columns
+        query = """
+        SELECT
+            name AS column_name,
+            type AS data_type
+        FROM system.columns
+        WHERE table = '$table_name' AND database = 'default'
+        """
+        column_info = ClickHouse.select_df(conn, query)
+        
+        # Add the table name and selection marker
+        column_info[!, :current_selxn] .= 1
+        column_info[!, :table_name] .= table_name
+    end
+    # Return the result with relevant columns
+    return select(column_info, 1 => :name, 2 => :type, :current_selxn, :table_name)
 end
 
 
@@ -51,4 +77,4 @@ function TidierDB.final_collect(sqlquery, ::Type{<:clickhouse})
     return df_result
 end
 
-end
+end
\ No newline at end of file
diff --git a/src/TidierDB.jl b/src/TidierDB.jl
index ae5d614..bb5731a 100644
--- a/src/TidierDB.jl
+++ b/src/TidierDB.jl
@@ -14,7 +14,6 @@ using GZip
 @reexport using Chain
 @reexport using DuckDB
 
-
  export db_table, set_sql_mode, @arrange, @group_by, @filter, @select, @mutate, @summarize, @summarise, 
  @distinct, @left_join, @right_join, @inner_join, @count, @window_order, @window_frame, @show_query, @collect, @slice_max, 
  @slice_min, @slice_sample, @rename, copy_to, duckdb_open, duckdb_connect, @semi_join, @full_join, 
@@ -144,8 +143,11 @@ function finalize_query(sqlquery::SQLQuery)
     if !isempty(sqlquery.groupBy) push!(query_parts, "" * sqlquery.groupBy) end
     if !isempty(sqlquery.having) push!(query_parts, " " * sqlquery.having) end
     if !isempty(sqlquery.orderBy) push!(query_parts, " " * sqlquery.orderBy) end
-
     complete_query = join(filter(!isempty, query_parts), " ")
+
+    if !isempty(sqlquery.ch_settings) && current_sql_mode[] == clickhouse()
+        complete_query = complete_query * " \n " * string(sqlquery.ch_settings)
+    end
     complete_query = replace(complete_query, "&&" => " AND ", "||" => " OR ",
      "FROM )" => ")" ,  "SELECT SELECT " => "SELECT ", "SELECT  SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ", 
      "SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", )
@@ -223,11 +225,14 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de
     else
         error("Unsupported SQL mode: $(current_sql_mode[])")
     end
-
+    clickhouse_settings =""
     formatted_table_name = if current_sql_mode[] == snowflake()
         "$(db.database).$(db.schema).$table_name"
     elseif db isa DatabricksConnection || current_sql_mode[] == databricks()
         "$(db.database).$(db.schema).$table_name"
+    elseif current_sql_mode[] == clickhouse() && occursin(r"[:/]", table_name)
+       clickhouse_settings = " SETTINGS enable_url_encoding=0, max_http_get_redirects=10 "
+        "url('$table_name')"
     elseif iceberg
         "iceberg_scan('$table_name', allow_moved_paths = true)"
     elseif delta
@@ -240,7 +245,7 @@ function db_table(db, table, athena_params::Any=nothing; iceberg::Bool=false, de
         table_name
     end
     
-    return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params)
+    return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params, ch_settings=clickhouse_settings)
 end
 
 function db_table(db, table::Vector{String}, athena_params::Any=nothing)
@@ -249,23 +254,49 @@ function db_table(db, table::Vector{String}, athena_params::Any=nothing)
     end
 
     # Get file type from the first file
-    file_type = lowercase(splitext(first(table))[2])
 
-    # Format paths: wrap each in single quotes and join with commas
-    formatted_paths = join(map(path -> "'$path'", table), ", ")
+    # Check the current SQL mode
+    if current_sql_mode[] == duckdb()
+        file_type = lowercase(splitext(first(table))[2])
+
+        # Format paths: wrap each in single quotes and join with commas
+        formatted_paths = join(map(path -> "'$path'", table), ", ")
+
+        formatted_table_name = if file_type == ".csv"
+            "read_csv([$formatted_paths])"
+        elseif file_type == ".parquet"
+            "read_parquet([$formatted_paths])"
+        else
+            error("Unsupported file type: $file_type")
+        end
+
+        # Get metadata from the first file
+        meta_vec = first(table)
+        metadata = get_table_metadata(db, "'$meta_vec'")
+
+        return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params)
+
+    elseif current_sql_mode[] == clickhouse()
+
+        # Construct the ClickHouse SQL query with UNION ALL for each file
+        union_queries = join(map(path -> """
+            SELECT *
+            FROM url('$path')
+        """, table), " UNION ALL ")
+
+        # Wrap the union_queries in a subquery for further processing
+        formatted_table_name = "($union_queries)"
+        if occursin(r"[:/]", first(table))
+            clickhouse_settings = " SETTINGS enable_url_encoding=0, max_http_get_redirects=10 "
+        end
+        meta_vec = first(table)
+        metadata = get_table_metadata(db, "'$meta_vec'")
+
+        return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params, ch_settings = clickhouse_settings)
 
-    formatted_table_name = if file_type == ".csv"
-        "read_csv([$formatted_paths])"
-    elseif file_type == ".parquet"
-        "read_parquet([$formatted_paths])"
     else
-        error("Unsupported file type: $file_type")
+        error("Unsupported SQL mode: $(current_sql_mode[])")
     end
-    meta_vec = first(table)
-    # Get metadata from the first file
-    metadata = get_table_metadata(db, "'$meta_vec'")
-
-    return SQLQuery(from=formatted_table_name, metadata=metadata, db=db, athena_params=athena_params)
 end
 
 """
diff --git a/src/structs.jl b/src/structs.jl
index 1cc0c50..96b8081 100644
--- a/src/structs.jl
+++ b/src/structs.jl
@@ -27,9 +27,12 @@ mutable struct SQLQuery
     ctes::Vector{CTE}
     cte_count::Int
     athena_params::Any    
-
-    function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing)
-        new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, metadata, distinct, db, ctes, cte_count, athena_params)
+    ch_settings::String
+    function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", 
+                    window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), 
+                    distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, ch_settings::String="")
+        new(select, from, where, groupBy, orderBy, having, window_order, windowFrame, is_aggregated, post_aggregation, 
+                metadata, distinct, db, ctes, cte_count, athena_params, ch_settings)
     end
 end
 

From 3798d67fef7a6bbd1609962606004414210cda0d Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 10 Aug 2024 11:50:37 -0400
Subject: [PATCH 2/5] clarify *filereading support in docs and docstrings for
 duckdb

---
 src/docstrings.jl | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/docstrings.jl b/src/docstrings.jl
index b8d8443..61f5a7f 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -1049,19 +1049,31 @@ const docstring_db_table =
 """
     db_table(database, table_name, athena_params, delta = false, iceberg = false)
 
-`db_table` starts the underlying SQL query struct, adding the metadata and table. 
+`db_table` starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a 
+name it will not copy it to memory, but rather ready directly from the file.
+
 
 # Arguments
 
 `database`: The Database or connection object
 `table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types
-      -CSV  
-      -Parquet
-      -Json
-      -Iceberg
-      -Delta
-      -S3 tables from AWS or Google Cloud 
-      - vector of CSV or Parquet paths to read multiple at once
+      --CSV  
+      
+      --Parquet
+      
+      --Json
+      
+      --Iceberg
+      
+      --Delta
+      
+      --S3 tables from AWS or Google Cloud 
+      
+      --vector of CSV or Parquet paths to read multiple at once
+DuckDB and ClickHouse support vectors of paths and URLs. 
+DuckDB also supports use of `*` wildcards to read all files of a type in a location such as:
+`db_table(db, "Path/to/testing_files/*.parquet")`
+
 `delta`: must be true to read delta files
 `iceberg`: must be true to read iceberg finalize_ctes
 

From 6e4d8424608e35d9875bec85ac970f757800e836 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sat, 10 Aug 2024 19:34:12 -0400
Subject: [PATCH 3/5] fix struct

---
 src/structs.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/structs.jl b/src/structs.jl
index 975b140..ba55263 100644
--- a/src/structs.jl
+++ b/src/structs.jl
@@ -27,9 +27,9 @@ mutable struct SQLQuery
     ctes::Vector{CTE}
     cte_count::Int
     athena_params::Any    
-
     limit::String
-
+    ch_settings::String
+    
     function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", 
         window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), 
         distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, limit::String="", 

From 9573fd4af13522ff87739146ab6101d518dc9ca8 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 11 Aug 2024 13:03:22 -0400
Subject: [PATCH 4/5] fix from query for CH, add `db_table` section in getting
 started, preview intermediates, count bugfix

---
 NEWS.md                                    |  5 ++-
 docs/examples/UserGuide/from_queryex.jl    | 47 ++++++++++++++++++++++
 docs/examples/UserGuide/getting_started.jl | 17 ++++++--
 src/TBD_macros.jl                          | 11 +++++
 src/docstrings.jl                          |  5 +--
 src/structs.jl                             |  6 ++-
 6 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index bb50831..5e9a428 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,7 +6,10 @@
 - adds support for reading URLS in `db_table` with ClickHouse 
 - adds support for reading from multiple files at once as a vector of urls in `db_table` when using ClickHouse
     - ie `db_table(db, ["url1", "url2"])`
-
+- adds docs around using `*` for reading in multiple files from folder
+- adds docs for `db_table`
+- adds docs for previewing or saving intermediate tables in ongoing `@chain`
+- Bugfix: `@count` updates metadata
 
 ## v0.3.1 - 2024-07-28
 - adds support for reading from multiple files at once as a vector of paths in `db_table` when using DuckDB
diff --git a/docs/examples/UserGuide/from_queryex.jl b/docs/examples/UserGuide/from_queryex.jl
index 785233a..9ab67db 100644
--- a/docs/examples/UserGuide/from_queryex.jl
+++ b/docs/examples/UserGuide/from_queryex.jl
@@ -59,4 +59,51 @@
 #    1 │ Pontiac Firebird       8      19.2
 #    2 │ Toyota Corolla         4      33.9
 #    3 │ Hornet 4 Drive         6      21.4
+# ```
+
+# ## Preview an intermediate table
+# While querying a dataset, you may wish to see an intermediate table, or even save it. You can use `@aside` and `from_query(_)`, illustrated below, to do just that. 
+# While we opted to print the results in this simple example below, we could have saved them by using `name = DB.@chain...`
+
+# ```julia
+# import ClickHouse;
+# conn = conn = DB.connect(DB.clickhouse(); host="localhost", port=19000, database="default", user="default", password="")
+# path = "https://huggingface.co/datasets/maharshipandya/spotify-tracks-dataset/resolve/refs%2Fconvert%2Fparquet/default/train/0000.parquet"
+# DB.@chain DB.db_table(conn, path) begin
+#    DB.@count(cyl)
+#    @aside println(DB.@chain DB.from_query(_) DB.@head(5) DB.@collect)
+#    DB.@arrange(desc(count))
+#    DB.@collect
+# end
+# ```
+# ```
+# 5×2 DataFrame
+#  Row │ artists  count      
+#      │ String?  UInt64 
+# ─────┼─────────────────
+#    1 │ missing       1
+#    2 │ Wizo          3
+#    3 │ MAGIC!        3
+#    4 │ Macaco        1
+#    5 │ SOYOU         1
+# 31438×2 DataFrame
+#    Row │ artists          count      
+#        │ String?          UInt64 
+# ───────┼─────────────────────────
+#      1 │ The Beatles         279
+#      2 │ George Jones        271
+#      3 │ Stevie Wonder       236
+#      4 │ Linkin Park         224
+#      5 │ Ella Fitzgerald     222
+#      6 │ Prateek Kuhad       217
+#      7 │ Feid                202
+#    ⋮   │        ⋮           ⋮
+#  31432 │ Leonard               1
+#  31433 │ marcos g              1
+#  31434 │ BLVKSHP               1
+#  31435 │ Memtrix               1
+#  31436 │ SOYOU                 1
+#  31437 │ Macaco                1
+#  31438 │ missing               1
+#                31424 rows omitted
 # ```
\ No newline at end of file
diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl
index 0d2299a..1376b33 100644
--- a/docs/examples/UserGuide/getting_started.jl
+++ b/docs/examples/UserGuide/getting_started.jl
@@ -14,17 +14,17 @@
 # For example
 # Connecting to MySQL
 # ```julia
-# conn = connect(mysql(); host="localhost", user="root", password="password", db="mydb")
+# conn = DB.connect(DB.mysql(); host="localhost", user="root", password="password", db="mydb")
 # ```
 # versus connecting to DuckDB
 # ```julia
-# conn = connect(duckdb())
+# conn = DB.connect(DB.duckdb())
 # ```
 
 # ## Package Extensions 
 # The following backends utilize package extensions. To use one of backends listed below, you will need to write `using Library`
 
-# - ClickHouse: `using ClickHouse`
+# - ClickHouse: `import ClickHouse`
 # - MySQL and MariaDB: `using MySQL`
 # - MSSQL: `using ODBC` 
 # - Postgres: `using LibPQ`
@@ -33,3 +33,14 @@
 # - Oracle: `using ODBC` 
 # - Google BigQuery: `using GoogleCloud`
 
+# ## `db_table`
+# What does `db_table` do? 
+# `db_table` starts the underlying SQL query struct, in addition to pulling the table metadata and storing it there. Storing metadata is what enables a lazy interface that also supports tidy selection.  
+# `db_table` has two required arguments: `connection` and `table`
+# `table` can be a table name on a database or a path/url to file to read.  When passing `db_table` a path or url, the table is not copied into memory.
+# With DuckDB and ClickHouse, if you have a folder of multiple files to read, you can use `*` read in all files matching the pattern. 
+# For example, the below would read all files that end in `.csv` in the given folder.
+# ```julia
+# db_table(db, "folder/path/*.csv")
+# ``` 
+# `db_table` also supports iceberg, delta, and S3 file paths via DuckDB.
\ No newline at end of file
diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl
index 76961df..2cdd154 100644
--- a/src/TBD_macros.jl
+++ b/src/TBD_macros.jl
@@ -455,15 +455,26 @@ macro count(sqlquery, group_by_columns...)
     group_clause = join(group_by_cols_str, ", ")
 
     return quote
+
         sq = $(esc(sqlquery))
         if isa(sq, SQLQuery)
             # Interpolate `group_clause` directly into the quoted code to avoid scope issues
             if !isempty($group_clause)
+                for col in $group_by_cols_str
+                    $(esc(sqlquery)).metadata.current_selxn .= 0
+                    matching_indices = findall($(esc(sqlquery)).metadata.name .== col)
+                    $(esc(sqlquery)).metadata.current_selxn[matching_indices] .= 1
+                 end
                 sq.select = "SELECT " * $group_clause * ", COUNT(*) AS count"
                 sq.groupBy = "GROUP BY " * $group_clause
+                push!(sq.metadata, Dict("name" => "count", "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from))
+
             else
                 # If no grouping columns are specified, just count all records
+                $(esc(sqlquery)).metadata.current_selxn .= 0
                 sq.select = "SELECT COUNT(*) AS count"
+                push!(sq.metadata, Dict("name" => "count", "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from))
+
             end
             
             # Adjustments for previously set GROUP BY or ORDER BY clauses might be needed here
diff --git a/src/docstrings.jl b/src/docstrings.jl
index 1e361c7..d41f7d1 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -1052,9 +1052,7 @@ const docstring_db_table =
 `db_table` starts the underlying SQL query struct, adding the metadata and table. If paths are passed directly to db_table instead of a 
 name it will not copy it to memory, but rather ready directly from the file.
 
-
 # Arguments
-
 `database`: The Database or connection object
 `table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types
       --CSV  
@@ -1071,9 +1069,8 @@ name it will not copy it to memory, but rather ready directly from the file.
       
       --vector of CSV or Parquet paths to read multiple at once
 DuckDB and ClickHouse support vectors of paths and URLs. 
-DuckDB also supports use of `*` wildcards to read all files of a type in a location such as:
+DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as:
 `db_table(db, "Path/to/testing_files/*.parquet")`
-
 `delta`: must be true to read delta files
 `iceberg`: must be true to read iceberg finalize_ctes
 
diff --git a/src/structs.jl b/src/structs.jl
index ba55263..77e786d 100644
--- a/src/structs.jl
+++ b/src/structs.jl
@@ -29,7 +29,7 @@ mutable struct SQLQuery
     athena_params::Any    
     limit::String
     ch_settings::String
-    
+
     function SQLQuery(;select::String="", from::String="", where::String="", groupBy::String="", orderBy::String="", having::String="", 
         window_order::String="", windowFrame::String="", is_aggregated::Bool=false, post_aggregation::Bool=false, metadata::DataFrame=DataFrame(), 
         distinct::Bool=false, db::Any=nothing, ctes::Vector{CTE}=Vector{CTE}(), cte_count::Int=0, athena_params::Any=nothing, limit::String="", 
@@ -96,7 +96,9 @@ function from_query(query::TidierDB.SQLQuery)
         db=query.db,
         ctes=[copy(cte) for cte in query.ctes],  
         cte_count=query.cte_count,
-        athena_params = query.athena_params
+        athena_params = query.athena_params,
+        limit = query.limit,
+        ch_settings = query.ch_settings
     )
     return new_query
 end
\ No newline at end of file

From fb1df4758a6883b71de29a91770bfa9e6d66b881 Mon Sep 17 00:00:00 2001
From: drizk1 <rizkytennis@gmail.com>
Date: Sun, 11 Aug 2024 13:27:04 -0400
Subject: [PATCH 5/5] tiny doc tweak

---
 docs/examples/UserGuide/from_queryex.jl       |  2 +-
 .../UserGuide/functions_pass_to_DB.jl         |  1 +
 docs/examples/UserGuide/getting_started.jl    |  3 ++
 src/docstrings.jl                             | 33 ++++++++-----------
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/docs/examples/UserGuide/from_queryex.jl b/docs/examples/UserGuide/from_queryex.jl
index 9ab67db..8a39597 100644
--- a/docs/examples/UserGuide/from_queryex.jl
+++ b/docs/examples/UserGuide/from_queryex.jl
@@ -61,7 +61,7 @@
 #    3 │ Hornet 4 Drive         6      21.4
 # ```
 
-# ## Preview an intermediate table
+# ## Preview or save an intermediate table
 # While querying a dataset, you may wish to see an intermediate table, or even save it. You can use `@aside` and `from_query(_)`, illustrated below, to do just that. 
 # While we opted to print the results in this simple example below, we could have saved them by using `name = DB.@chain...`
 
diff --git a/docs/examples/UserGuide/functions_pass_to_DB.jl b/docs/examples/UserGuide/functions_pass_to_DB.jl
index d1940c9..e204de0 100644
--- a/docs/examples/UserGuide/functions_pass_to_DB.jl
+++ b/docs/examples/UserGuide/functions_pass_to_DB.jl
@@ -10,6 +10,7 @@
 
 # ```
 # using TidierDB
+# db = connect(duckdb())
 # path = "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv"
 # copy_to(db, path, "mtcars");
 #
diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl
index 1376b33..94d432d 100644
--- a/docs/examples/UserGuide/getting_started.jl
+++ b/docs/examples/UserGuide/getting_started.jl
@@ -9,6 +9,7 @@
 
 # Alternatively, `using Tidier` will import TidierDB in the above manner for you, where TidierDB functions and macros will be available as `DB.@mutate()` and so on, and the TidierData equivalent would be `@mutate()`.
 
+# ## Connecting
 # To connect to a database, you can uset the `connect` function  as shown below, or establish your own connection through the respecitve libraries.
 
 # For example
@@ -21,6 +22,8 @@
 # conn = DB.connect(DB.duckdb())
 # ```
 
+# You can also use establish a connection through an alternate method that you preferred, and use that as your connection as well. 
+
 # ## Package Extensions 
 # The following backends utilize package extensions. To use one of backends listed below, you will need to write `using Library`
 
diff --git a/src/docstrings.jl b/src/docstrings.jl
index d41f7d1..556556e 100644
--- a/src/docstrings.jl
+++ b/src/docstrings.jl
@@ -1053,26 +1053,19 @@ const docstring_db_table =
 name it will not copy it to memory, but rather ready directly from the file.
 
 # Arguments
-`database`: The Database or connection object
-`table_name`: tablename as a string. Table name can be a name of a table on the database or paths to the following types
-      --CSV  
-      
-      --Parquet
-      
-      --Json
-      
-      --Iceberg
-      
-      --Delta
-      
-      --S3 tables from AWS or Google Cloud 
-      
-      --vector of CSV or Parquet paths to read multiple at once
-DuckDB and ClickHouse support vectors of paths and URLs. 
-DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as:
-`db_table(db, "Path/to/testing_files/*.parquet")`
-`delta`: must be true to read delta files
-`iceberg`: must be true to read iceberg finalize_ctes
+- `database`: The Database or connection object
+- `table_name`: tablename as a string (name, local path, or URL).
+      - CSV/TSV  
+      - Parquet
+      - Json 
+      - Iceberg
+      - Delta
+      - S3 tables from AWS or Google Cloud 
+     - DuckDB and ClickHouse support vectors of paths and URLs. 
+     - DuckDB and ClickHouse also support use of `*` wildcards to read all files of a type in a location such as:
+- `db_table(db, "Path/to/testing_files/*.parquet")`
+- `delta`: must be true to read delta files
+- `iceberg`: must be true to read iceberg finalize_ctes
 
 # Example
 ```julia