Skip to content

Commit 9223cf0

Browse files
authored
Merge branch 'main' into spark_abs
2 parents 832a6ed + c1965b6 commit 9223cf0

File tree

89 files changed

+1214
-470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+1214
-470
lines changed

.github/workflows/audit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
steps:
4343
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
4444
- name: Install cargo-audit
45-
uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46
45+
uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b # v2.62.47
4646
with:
4747
tool: cargo-audit
4848
- name: Run audit check

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ jobs:
434434
sudo apt-get update -qq
435435
sudo apt-get install -y -qq clang
436436
- name: Setup wasm-pack
437-
uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46
437+
uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b # v2.62.47
438438
with:
439439
tool: wasm-pack
440440
- name: Run tests with headless mode
@@ -761,7 +761,7 @@ jobs:
761761
- name: Setup Rust toolchain
762762
uses: ./.github/actions/setup-builder
763763
- name: Install cargo-msrv
764-
uses: taiki-e/install-action@f535147c22906d77695e11cb199e764aa610a4fc # v2.62.46
764+
uses: taiki-e/install-action@6f9c7cc51aa54b13cbcbd12f8bbf69d8ba405b4b # v2.62.47
765765
with:
766766
tool: cargo-msrv
767767

benchmarks/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ repository = { workspace = true }
2626
license = { workspace = true }
2727
rust-version = { workspace = true }
2828

29+
# Note: add additional linter rules in lib.rs.
30+
# Rust does not support workspace + new linter rules in subcrates yet
31+
# https://github.com/rust-lang/cargo/issues/13157
2932
[lints]
3033
workspace = true
3134

datafusion-examples/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ license = { workspace = true }
2929
authors = { workspace = true }
3030
rust-version = { workspace = true }
3131

32+
# Note: add additional linter rules in lib.rs.
33+
# Rust does not support workspace + new linter rules in subcrates yet
34+
# https://github.com/rust-lang/cargo/issues/13157
3235
[lints]
3336
workspace = true
3437

datafusion-examples/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,13 @@ cargo run --example dataframe
6161
- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
6262
- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
6363
- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
64+
- [`examples/builtin_functions/date_time`](examples/builtin_functions/date_time.rs): Examples of date-time related functions and queries
6465
- [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
6566
- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
6667
- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
6768
- [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
6869
- [`flight/sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from Flight and and FlightSQL (e.g. JDBC) clients
69-
- [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
70+
- [`examples/builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
7071
- [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
7172
- [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
7273
- [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
@@ -81,7 +82,7 @@ cargo run --example dataframe
8182
- [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
8283
- [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
8384
- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
84-
- [`regexp.rs`](examples/regexp.rs): Examples of using regular expression functions
85+
- [`examples/builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions
8586
- [`remote_catalog.rs`](examples/regexp.rs): Examples of interfacing with a remote catalog (e.g. over a network)
8687
- [`examples/udf/simple_udaf.rs`](examples/udf/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
8788
- [`examples/udf/simple_udf.rs`](examples/udf/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF)
@@ -91,7 +92,6 @@ cargo run --example dataframe
9192
- [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings
9293
- [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
9394
- [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files)
94-
- [`date_time_function.rs`](examples/date_time_function.rs): Examples of date-time related functions and queries.
9595

9696
## Distributed
9797

datafusion-examples/examples/advanced_parquet_index.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ use url::Url;
121121
/// │ ╚═══════════════════╝ │ 1. With cached ParquetMetadata, so
122122
/// └───────────────────────┘ the ParquetSource does not re-read /
123123
/// Parquet File decode the thrift footer
124-
///
125124
/// ```
126125
///
127126
/// Within a Row Group, Column Chunks store data in DataPages. This example also

datafusion-examples/examples/date_time_functions.rs renamed to datafusion-examples/examples/builtin_functions/date_time.rs

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,20 @@ use datafusion::common::assert_contains;
2626
use datafusion::error::Result;
2727
use datafusion::prelude::*;
2828

29-
#[tokio::main]
30-
async fn main() -> Result<()> {
29+
/// Example: Working with Date and Time Functions
30+
///
31+
/// This example demonstrates how to work with various date and time
32+
/// functions in DataFusion using both the DataFrame API and SQL queries.
33+
///
34+
/// It includes:
35+
/// - `make_date`: building `DATE` values from year, month, and day columns
36+
/// - `to_date`: converting string expressions into `DATE` values
37+
/// - `to_timestamp`: parsing strings or numeric values into `TIMESTAMP`s
38+
/// - `to_char`: formatting dates, timestamps, and durations as strings
39+
///
40+
/// Together, these examples show how to create, convert, and format temporal
41+
/// data using DataFusion’s built-in functions.
42+
pub async fn date_time() -> Result<()> {
3143
query_make_date().await?;
3244
query_to_date().await?;
3345
query_to_timestamp().await?;

datafusion-examples/examples/function_factory.rs renamed to datafusion-examples/examples/builtin_functions/function_factory.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ use std::sync::Arc;
4242
///
4343
/// This example is rather simple and does not cover all cases required for a
4444
/// real implementation.
45-
#[tokio::main]
46-
async fn main() -> Result<()> {
45+
pub async fn function_factory() -> Result<()> {
4746
// First we must configure the SessionContext with our function factory
4847
let ctx = SessionContext::new()
4948
// register custom function factory
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! # These are miscellaneous function-related examples
19+
//!
20+
//! These examples demonstrate miscellaneous function-related features.
21+
//!
22+
//! Each subcommand runs a corresponding example:
23+
//! - `date_time` — examples of date-time related functions and queries
24+
//! - `function_factory` — register `CREATE FUNCTION` handler to implement SQL macros
25+
//! - `regexp` — examples of using regular expression functions
26+
27+
mod date_time;
28+
mod function_factory;
29+
mod regexp;
30+
31+
use std::str::FromStr;
32+
33+
use datafusion::error::{DataFusionError, Result};
34+
35+
enum ExampleKind {
36+
DateTime,
37+
FunctionFactory,
38+
Regexp,
39+
}
40+
41+
impl AsRef<str> for ExampleKind {
42+
fn as_ref(&self) -> &str {
43+
match self {
44+
Self::DateTime => "date_time",
45+
Self::FunctionFactory => "function_factory",
46+
Self::Regexp => "regexp",
47+
}
48+
}
49+
}
50+
51+
impl FromStr for ExampleKind {
52+
type Err = DataFusionError;
53+
54+
fn from_str(s: &str) -> Result<Self> {
55+
match s {
56+
"date_time" => Ok(Self::DateTime),
57+
"function_factory" => Ok(Self::FunctionFactory),
58+
"regexp" => Ok(Self::Regexp),
59+
_ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
60+
}
61+
}
62+
}
63+
64+
impl ExampleKind {
65+
const ALL: [Self; 3] = [Self::DateTime, Self::FunctionFactory, Self::Regexp];
66+
67+
const EXAMPLE_NAME: &str = "builtin_functions";
68+
69+
fn variants() -> Vec<&'static str> {
70+
Self::ALL.iter().map(|x| x.as_ref()).collect()
71+
}
72+
}
73+
74+
#[tokio::main]
75+
async fn main() -> Result<()> {
76+
let usage = format!(
77+
"Usage: cargo run --example {} -- [{}]",
78+
ExampleKind::EXAMPLE_NAME,
79+
ExampleKind::variants().join("|")
80+
);
81+
82+
let arg = std::env::args().nth(1).ok_or_else(|| {
83+
eprintln!("{usage}");
84+
DataFusionError::Execution("Missing argument".to_string())
85+
})?;
86+
87+
match arg.parse::<ExampleKind>()? {
88+
ExampleKind::DateTime => date_time::date_time().await?,
89+
ExampleKind::FunctionFactory => function_factory::function_factory().await?,
90+
ExampleKind::Regexp => regexp::regexp().await?,
91+
}
92+
93+
Ok(())
94+
}

datafusion-examples/examples/regexp.rs renamed to datafusion-examples/examples/builtin_functions/regexp.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,11 @@ use datafusion::prelude::*;
2828
///
2929
/// Supported flags can be found at
3030
/// https://docs.rs/regex/latest/regex/#grouping-and-flags
31-
#[tokio::main]
32-
async fn main() -> Result<()> {
31+
pub async fn regexp() -> Result<()> {
3332
let ctx = SessionContext::new();
3433
ctx.register_csv(
3534
"examples",
36-
"../../datafusion/physical-expr/tests/data/regex.csv",
35+
"datafusion/physical-expr/tests/data/regex.csv",
3736
CsvReadOptions::new(),
3837
)
3938
.await?;

0 commit comments

Comments
 (0)