Skip to content

Change way to generate implemented functions #1064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ dev = "run -- --backend=memory --cors-allow-origin=http://localhost:8080 --cors-
auth_demo = "run -- --jwt-secret=test --backend=memory --cors-allow-origin=http://localhost:8080 --cors-enabled=true --tracing-level=debug"
lint = "clippy --all-targets --workspace"
embucket-seed = "run -p embucket-seed -- --server-address 127.0.0.1:3000 --auth-user embucket --auth-password embucket --seed-variant typical"
update-functions = "run -p embucket-functions generate-implemented-functions"
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions crates/embucket-functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,16 @@ indexmap = "2.9.0"
strsim = "0.11"
tracing = "0.1.41"
tokio = { workspace = true }
datafusion-functions-json = { workspace = true }

[dev-dependencies]
bytes = { workspace = true }
insta = { version = "1.42.0", features = ["yaml", "filters"] }
paste = "1"

[[bin]]
name = "generate-implemented-functions"
path = "src/bin/generate_implemented_functions.rs"

[lints]
workspace = true
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ After implementing the function, it **MUST** be registered properly:
Add the function to the appropriate function registry module. The easiest way is to register it through lib.rs with updating `register_udfs` function.

### 2. Add to Tracking (REQUIRED)
Add the function name to `implemented_functions.csv`:
Run `cargo update-implemented-functions` to regenerate a list of registred functions.

### 3. Regenerate Unimplemented Functions with Documentation (RECOMMENDED)
```bash
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use std::collections::BTreeSet;
use std::fs;
use std::path::PathBuf;

use core_history::store::SlateDBHistoryStore;
use datafusion::prelude::SessionContext;
use embucket_functions::table::register_udtfs;
use embucket_functions::{register_udafs, register_udfs};

/// Find the project root by looking for the crates folder
fn find_project_root() -> Result<PathBuf, Box<dyn std::error::Error>> {
let mut current_dir = std::env::current_dir()?;

loop {
let crates_dir = current_dir.join("crates");
if crates_dir.exists() && crates_dir.is_dir() {
return Ok(current_dir);
}

match current_dir.parent() {
Some(parent) => current_dir = parent.to_path_buf(),
None => return Err("Could not find project root with crates folder".into()),
}
}
}

/// Generates the `implemented_functions.csv` file by extracting all function names
/// from a fully configured `DataFusion` `SessionContext`.
pub async fn generate_implemented_functions_csv() -> Result<(), Box<dyn std::error::Error>> {
eprintln!("Generating implemented_functions.csv...");

// Find project root and construct the CSV path
let project_root = find_project_root()?;
let csv_path = project_root
.join("crates")
.join("embucket-functions")
.join("src")
.join("visitors")
.join("unimplemented")
.join("helper")
.join("implemented_functions.csv");

// Create a SessionContext and register all the functions like in session.rs
let mut ctx = SessionContext::new();

register_udfs(&mut ctx)?;
register_udafs(&mut ctx)?;

let history_store = SlateDBHistoryStore::new_in_memory().await;
register_udtfs(&ctx, history_store);

datafusion_functions_json::register_all(&mut ctx)?;

let state = ctx.state();

let all_functions: BTreeSet<_> = state
.scalar_functions()
.keys()
.chain(state.aggregate_functions().keys())
.chain(state.window_functions().keys())
.chain(state.table_functions().keys())
.cloned()
.collect();

// Create the CSV content
let mut csv_content = String::new();
csv_content.push_str("IMPLEMENTED_FUNCTIONS\n");

let function_count = all_functions.len();
for function_name in all_functions {
csv_content.push_str(&function_name);
csv_content.push('\n');
}

// Ensure the directory exists
if let Some(parent) = csv_path.parent() {
fs::create_dir_all(parent)?;
}

// Write to the helper directory
fs::write(&csv_path, csv_content)?;

eprintln!("✅ Generated implemented_functions.csv with {function_count} functions");
eprintln!("📁 File location: {}", csv_path.display());

Ok(())
}

#[tokio::main]
async fn main() {
if let Err(e) = generate_implemented_functions_csv().await {
eprintln!("Error: {e}");
std::process::exit(1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -591,13 +591,6 @@ pub const CONVERSION_FUNCTIONS: &[(&str, FunctionInfo)] = &[
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/st_geometryfromwkt")
.with_subcategory("geospatial")
),
("TO_ARRAY", FunctionInfo::new(
"TO_ARRAY",
"Converts the input expression to an ARRAY."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/to_array")
.with_subcategory("semi-structured")
),
("TO_BINARY", FunctionInfo::new(
"TO_BINARY",
"Converts the input expression to a binary value."
Expand Down Expand Up @@ -2070,13 +2063,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/array_construct_compact")
.with_subcategory("array")
),
("AS_ARRAY", FunctionInfo::new(
"AS_ARRAY",
"Casts a VARIANT value to an ARRAY value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/as_array")
.with_subcategory("conversion")
),
("AS_BINARY", FunctionInfo::new(
"AS_BINARY",
"Casts a VARIANT value to a BINARY value."
Expand Down Expand Up @@ -2133,13 +2119,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/as_decimal-number")
.with_subcategory("conversion")
),
("AS_OBJECT", FunctionInfo::new(
"AS_OBJECT",
"Casts a VARIANT value to an OBJECT value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/as_object")
.with_subcategory("conversion")
),
("AS_REAL", FunctionInfo::new(
"AS_REAL",
"Casts a VARIANT value to a floating-point value."
Expand Down Expand Up @@ -2207,24 +2186,12 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/filter")
),
("GET_IGNORE_CASE", FunctionInfo::new(
"GET_IGNORE_CASE",
"Extracts a field value from an object; returns NULL if either of the arguments is NULL."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/get_ignore_case")
),
("IS_BINARY", FunctionInfo::new(
"IS_BINARY",
"Returns TRUE if its VARIANT argument contains a binary string value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_binary")
),
("IS_BOOLEAN", FunctionInfo::new(
"IS_BOOLEAN",
"Returns TRUE if its VARIANT argument contains a BOOLEAN value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_boolean")
),
("IS_CHAR", FunctionInfo::new(
"IS_CHAR",
"Returns TRUE if its VARIANT argument contains a string value."
Expand All @@ -2249,31 +2216,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_decimal")
),
("IS_DOUBLE", FunctionInfo::new(
"IS_DOUBLE",
"Returns TRUE if its VARIANT argument contains a floating-point number, fixed-point number, or integer value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_double-real")
),
("IS_INTEGER", FunctionInfo::new(
"IS_INTEGER",
"Returns TRUE if its VARIANT argument contains an integer value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_integer")
),
("IS_NULL_VALUE", FunctionInfo::new(
"IS_NULL_VALUE",
"Returns TRUE if its VARIANT argument is a JSON null value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_null_value")
.with_subcategory("json")
),
("IS_REAL", FunctionInfo::new(
"IS_REAL",
"Returns TRUE if its VARIANT argument contains a floating-point number, fixed-point number, or integer value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_double-real")
),
("IS_TIME", FunctionInfo::new(
"IS_TIME",
"Verifies whether a VARIANT argument contains a TIME value."
Expand All @@ -2298,18 +2240,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_timestamp")
),
("IS_VARCHAR", FunctionInfo::new(
"IS_VARCHAR",
"Returns TRUE if its VARIANT argument contains a string value."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/is_char-varchar")
),
("JSON_EXTRACT_PATH_TEXT", FunctionInfo::new(
"JSON_EXTRACT_PATH_TEXT",
"Parses the first argument as a JSON string and returns the value of the element pointed to by the path in the second argument."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text")
),
("MAP_CAT", FunctionInfo::new(
"MAP_CAT",
"Returns the concatenatation of two MAPs."
Expand Down Expand Up @@ -2352,13 +2282,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/map_size")
.with_subcategory("map")
),
("OBJECT_CONSTRUCT_KEEP_NULL", FunctionInfo::new(
"OBJECT_CONSTRUCT_KEEP_NULL",
"Returns an OBJECT constructed from the arguments that retains key-values pairs with NULL values."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/object_construct_keep_null")
.with_subcategory("object")
),
("PARSE_XML", FunctionInfo::new(
"PARSE_XML",
"Interprets an input string as an XML document, producing an OBJECT value."
Expand Down Expand Up @@ -2391,12 +2314,6 @@ pub const SEMISTRUCTURED_FUNCTIONS: &[(&str, FunctionInfo)] = &[
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/transform")
),
("TYPEOF", FunctionInfo::new(
"TYPEOF",
"Returns the type of a value stored in a VARIANT column."
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/typeof")
),
("XMLGET", FunctionInfo::new(
"XMLGET",
"Extracts an XML element object (often referred to as simply a tag) from the content of the outer XML element based on the name and instance number of the specified tag."
Expand Down Expand Up @@ -3525,7 +3442,7 @@ pub const TABLE_FUNCTIONS: &[(&str, FunctionInfo)] = &[
)
.with_docs("https://docs.snowflake.com/en/sql-reference/functions/rest_event_history")
),
("SPLIT_TO_TABLE", FunctionInfo::new(
("SPLIT_TO_TABLE", FunctionInfo::new(
"SPLIT_TO_TABLE",
"This table function splits a string (based on a specified delimiter) and flattens the results into rows."
)
Expand Down
Loading