Skip to content

Commit 963e2cc

Browse files
committed
Merge remote-tracking branch 'apache/main' into csv-exec-builder
2 parents debfd19 + bcf715c commit 963e2cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1324
-557
lines changed

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ jobs:
9090
9191
# Ensure that the datafusion crate can be built with only a subset of the function
9292
# packages enabled.
93-
- name: Check datafusion (array_expressions)
94-
run: cargo check --no-default-features --features=array_expressions -p datafusion
93+
- name: Check datafusion (nested_expressions)
94+
run: cargo check --no-default-features --features=nested_expressions -p datafusion
9595

9696
- name: Check datafusion (crypto)
9797
run: cargo check --no-default-features --features=crypto_expressions -p datafusion

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ members = [
2525
"datafusion/execution",
2626
"datafusion/functions-aggregate",
2727
"datafusion/functions",
28-
"datafusion/functions-array",
28+
"datafusion/functions-nested",
2929
"datafusion/optimizer",
3030
"datafusion/physical-expr-common",
3131
"datafusion/physical-expr",
@@ -94,7 +94,7 @@ datafusion-execution = { path = "datafusion/execution", version = "40.0.0" }
9494
datafusion-expr = { path = "datafusion/expr", version = "40.0.0" }
9595
datafusion-functions = { path = "datafusion/functions", version = "40.0.0" }
9696
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "40.0.0" }
97-
datafusion-functions-array = { path = "datafusion/functions-array", version = "40.0.0" }
97+
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "40.0.0" }
9898
datafusion-optimizer = { path = "datafusion/optimizer", version = "40.0.0", default-features = false }
9999
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "40.0.0", default-features = false }
100100
datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "40.0.0", default-features = false }

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ This crate has several [features] which can be specified in your `Cargo.toml`.
7575

7676
Default features:
7777

78-
- `array_expressions`: functions for working with arrays such as `array_to_string`
78+
- `nested_expressions`: functions for working with nested type function such as `array_to_string`
7979
- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
8080
- `crypto_expressions`: cryptographic functions such as `md5` and `sha256`
8181
- `datetime_expressions`: date and time functions such as `to_timestamp`

datafusion-cli/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
// Constants defined for scalar construction.
19+
20+
// PI ~ 3.1415927 in f32
21+
#[allow(clippy::approx_constant)]
22+
pub(super) const PI_UPPER_F32: f32 = 3.141593_f32;
23+
24+
// PI ~ 3.141592653589793 in f64
25+
pub(super) const PI_UPPER_F64: f64 = 3.141592653589794_f64;
26+
27+
// -PI ~ -3.1415927 in f32
28+
#[allow(clippy::approx_constant)]
29+
pub(super) const NEGATIVE_PI_LOWER_F32: f32 = -3.141593_f32;
30+
31+
// -PI ~ -3.141592653589793 in f64
32+
pub(super) const NEGATIVE_PI_LOWER_F64: f64 = -3.141592653589794_f64;
33+
34+
// PI / 2 ~ 1.5707964 in f32
35+
pub(super) const FRAC_PI_2_UPPER_F32: f32 = 1.5707965_f32;
36+
37+
// PI / 2 ~ 1.5707963267948966 in f64
38+
pub(super) const FRAC_PI_2_UPPER_F64: f64 = 1.5707963267948967_f64;
39+
40+
// -PI / 2 ~ -1.5707964 in f32
41+
pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F32: f32 = -1.5707965_f32;
42+
43+
// -PI / 2 ~ -1.5707963267948966 in f64
44+
pub(super) const NEGATIVE_FRAC_PI_2_LOWER_F64: f64 = -1.5707963267948967_f64;

datafusion/common/src/scalar/mod.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
//! [`ScalarValue`]: stores single values
1919
20+
mod consts;
2021
mod struct_builder;
22+
2123
use std::borrow::Borrow;
2224
use std::cmp::Ordering;
2325
use std::collections::{HashSet, VecDeque};
@@ -1007,6 +1009,123 @@ impl ScalarValue {
10071009
}
10081010
}
10091011

1012+
/// Returns a [`ScalarValue`] representing PI
1013+
pub fn new_pi(datatype: &DataType) -> Result<ScalarValue> {
1014+
match datatype {
1015+
DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::PI)),
1016+
DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::PI)),
1017+
_ => _internal_err!("PI is not supported for data type: {:?}", datatype),
1018+
}
1019+
}
1020+
1021+
/// Returns a [`ScalarValue`] representing PI's upper bound
1022+
pub fn new_pi_upper(datatype: &DataType) -> Result<ScalarValue> {
1023+
// TODO: replace the constants with next_up/next_down when
1024+
// they are stabilized: https://doc.rust-lang.org/std/primitive.f64.html#method.next_up
1025+
match datatype {
1026+
DataType::Float32 => Ok(ScalarValue::from(consts::PI_UPPER_F32)),
1027+
DataType::Float64 => Ok(ScalarValue::from(consts::PI_UPPER_F64)),
1028+
_ => {
1029+
_internal_err!("PI_UPPER is not supported for data type: {:?}", datatype)
1030+
}
1031+
}
1032+
}
1033+
1034+
/// Returns a [`ScalarValue`] representing -PI's lower bound
1035+
pub fn new_negative_pi_lower(datatype: &DataType) -> Result<ScalarValue> {
1036+
match datatype {
1037+
DataType::Float32 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F32)),
1038+
DataType::Float64 => Ok(ScalarValue::from(consts::NEGATIVE_PI_LOWER_F64)),
1039+
_ => {
1040+
_internal_err!("-PI_LOWER is not supported for data type: {:?}", datatype)
1041+
}
1042+
}
1043+
}
1044+
1045+
/// Returns a [`ScalarValue`] representing FRAC_PI_2's upper bound
1046+
pub fn new_frac_pi_2_upper(datatype: &DataType) -> Result<ScalarValue> {
1047+
match datatype {
1048+
DataType::Float32 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F32)),
1049+
DataType::Float64 => Ok(ScalarValue::from(consts::FRAC_PI_2_UPPER_F64)),
1050+
_ => {
1051+
_internal_err!(
1052+
"PI_UPPER/2 is not supported for data type: {:?}",
1053+
datatype
1054+
)
1055+
}
1056+
}
1057+
}
1058+
1059+
// Returns a [`ScalarValue`] representing FRAC_PI_2's lower bound
1060+
pub fn new_neg_frac_pi_2_lower(datatype: &DataType) -> Result<ScalarValue> {
1061+
match datatype {
1062+
DataType::Float32 => {
1063+
Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F32))
1064+
}
1065+
DataType::Float64 => {
1066+
Ok(ScalarValue::from(consts::NEGATIVE_FRAC_PI_2_LOWER_F64))
1067+
}
1068+
_ => {
1069+
_internal_err!(
1070+
"-PI/2_LOWER is not supported for data type: {:?}",
1071+
datatype
1072+
)
1073+
}
1074+
}
1075+
}
1076+
1077+
/// Returns a [`ScalarValue`] representing -PI
1078+
pub fn new_negative_pi(datatype: &DataType) -> Result<ScalarValue> {
1079+
match datatype {
1080+
DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::PI)),
1081+
DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::PI)),
1082+
_ => _internal_err!("-PI is not supported for data type: {:?}", datatype),
1083+
}
1084+
}
1085+
1086+
/// Returns a [`ScalarValue`] representing PI/2
1087+
pub fn new_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1088+
match datatype {
1089+
DataType::Float32 => Ok(ScalarValue::from(std::f32::consts::FRAC_PI_2)),
1090+
DataType::Float64 => Ok(ScalarValue::from(std::f64::consts::FRAC_PI_2)),
1091+
_ => _internal_err!("PI/2 is not supported for data type: {:?}", datatype),
1092+
}
1093+
}
1094+
1095+
/// Returns a [`ScalarValue`] representing -PI/2
1096+
pub fn new_neg_frac_pi_2(datatype: &DataType) -> Result<ScalarValue> {
1097+
match datatype {
1098+
DataType::Float32 => Ok(ScalarValue::from(-std::f32::consts::FRAC_PI_2)),
1099+
DataType::Float64 => Ok(ScalarValue::from(-std::f64::consts::FRAC_PI_2)),
1100+
_ => _internal_err!("-PI/2 is not supported for data type: {:?}", datatype),
1101+
}
1102+
}
1103+
1104+
/// Returns a [`ScalarValue`] representing infinity
1105+
pub fn new_infinity(datatype: &DataType) -> Result<ScalarValue> {
1106+
match datatype {
1107+
DataType::Float32 => Ok(ScalarValue::from(f32::INFINITY)),
1108+
DataType::Float64 => Ok(ScalarValue::from(f64::INFINITY)),
1109+
_ => {
1110+
_internal_err!("Infinity is not supported for data type: {:?}", datatype)
1111+
}
1112+
}
1113+
}
1114+
1115+
/// Returns a [`ScalarValue`] representing negative infinity
1116+
pub fn new_neg_infinity(datatype: &DataType) -> Result<ScalarValue> {
1117+
match datatype {
1118+
DataType::Float32 => Ok(ScalarValue::from(f32::NEG_INFINITY)),
1119+
DataType::Float64 => Ok(ScalarValue::from(f64::NEG_INFINITY)),
1120+
_ => {
1121+
_internal_err!(
1122+
"Negative Infinity is not supported for data type: {:?}",
1123+
datatype
1124+
)
1125+
}
1126+
}
1127+
}
1128+
10101129
/// Create a zero value in the given type.
10111130
pub fn new_zero(datatype: &DataType) -> Result<ScalarValue> {
10121131
Ok(match datatype {

datafusion/core/Cargo.toml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,17 @@ name = "datafusion"
4040
path = "src/lib.rs"
4141

4242
[features]
43+
nested_expressions = ["datafusion-functions-nested"]
44+
# This feature is deprecated. Use the `nested_expressions` feature instead.
45+
array_expressions = ["nested_expressions"]
4346
# Used to enable the avro format
44-
array_expressions = ["datafusion-functions-array"]
4547
avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
4648
backtrace = ["datafusion-common/backtrace"]
4749
compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression", "tokio-util"]
4850
crypto_expressions = ["datafusion-functions/crypto_expressions"]
4951
datetime_expressions = ["datafusion-functions/datetime_expressions"]
5052
default = [
51-
"array_expressions",
53+
"nested_expressions",
5254
"crypto_expressions",
5355
"datetime_expressions",
5456
"encoding_expressions",
@@ -102,7 +104,7 @@ datafusion-execution = { workspace = true }
102104
datafusion-expr = { workspace = true }
103105
datafusion-functions = { workspace = true }
104106
datafusion-functions-aggregate = { workspace = true }
105-
datafusion-functions-array = { workspace = true, optional = true }
107+
datafusion-functions-nested = { workspace = true, optional = true }
106108
datafusion-optimizer = { workspace = true }
107109
datafusion-physical-expr = { workspace = true }
108110
datafusion-physical-expr-common = { workspace = true }
@@ -221,4 +223,4 @@ name = "parquet_statistic"
221223
[[bench]]
222224
harness = false
223225
name = "map_query_sql"
224-
required-features = ["array_expressions"]
226+
required-features = ["nested_expressions"]

datafusion/core/benches/map_query_sql.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use tokio::runtime::Runtime;
2727
use datafusion::prelude::SessionContext;
2828
use datafusion_common::ScalarValue;
2929
use datafusion_expr::Expr;
30-
use datafusion_functions_array::map::map;
30+
use datafusion_functions_nested::map::map;
3131

3232
mod data_utils;
3333

datafusion/core/src/execution/session_state_defaults.rs

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use crate::datasource::file_format::parquet::ParquetFormatFactory;
2626
use crate::datasource::file_format::FileFormatFactory;
2727
use crate::datasource::provider::{DefaultTableFactory, TableProviderFactory};
2828
use crate::execution::context::SessionState;
29-
#[cfg(feature = "array_expressions")]
30-
use crate::functions_array;
29+
#[cfg(feature = "nested_expressions")]
30+
use crate::functions_nested;
3131
use crate::{functions, functions_aggregate};
3232
use datafusion_execution::config::SessionConfig;
3333
use datafusion_execution::object_store::ObjectStoreUrl;
@@ -82,11 +82,11 @@ impl SessionStateDefaults {
8282
pub fn default_expr_planners() -> Vec<Arc<dyn ExprPlanner>> {
8383
let expr_planners: Vec<Arc<dyn ExprPlanner>> = vec![
8484
Arc::new(functions::core::planner::CoreFunctionPlanner::default()),
85-
// register crate of array expressions (if enabled)
86-
#[cfg(feature = "array_expressions")]
87-
Arc::new(functions_array::planner::ArrayFunctionPlanner),
88-
#[cfg(feature = "array_expressions")]
89-
Arc::new(functions_array::planner::FieldAccessPlanner),
85+
// register crate of nested expressions (if enabled)
86+
#[cfg(feature = "nested_expressions")]
87+
Arc::new(functions_nested::planner::NestedFunctionPlanner),
88+
#[cfg(feature = "nested_expressions")]
89+
Arc::new(functions_nested::planner::FieldAccessPlanner),
9090
#[cfg(any(
9191
feature = "datetime_expressions",
9292
feature = "unicode_expressions"
@@ -100,8 +100,8 @@ impl SessionStateDefaults {
100100
/// returns the list of default [`ScalarUDF']'s
101101
pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
102102
let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
103-
#[cfg(feature = "array_expressions")]
104-
functions.append(&mut functions_array::all_default_array_functions());
103+
#[cfg(feature = "nested_expressions")]
104+
functions.append(&mut functions_nested::all_default_nested_functions());
105105

106106
functions
107107
}
@@ -140,8 +140,9 @@ impl SessionStateDefaults {
140140
/// registers all the builtin array functions
141141
pub fn register_array_functions(state: &mut SessionState) {
142142
// register crate of array expressions (if enabled)
143-
#[cfg(feature = "array_expressions")]
144-
functions_array::register_all(state).expect("can not register array expressions");
143+
#[cfg(feature = "nested_expressions")]
144+
functions_nested::register_all(state)
145+
.expect("can not register nested expressions");
145146
}
146147

147148
/// registers all the builtin aggregate functions

datafusion/core/src/lib.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@
458458
//! * [datafusion_execution]: State and structures needed for execution
459459
//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
460460
//! * [datafusion_functions]: Scalar function packages
461-
//! * [datafusion_functions_array]: Scalar function packages for `ARRAY`s
461+
//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
462462
//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
463463
//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
464464
//! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
@@ -545,6 +545,11 @@ pub mod optimizer {
545545
pub use datafusion_optimizer::*;
546546
}
547547

548+
/// re-export of [`datafusion_physical_expr`] crate
549+
pub mod physical_expr_common {
550+
pub use datafusion_physical_expr_common::*;
551+
}
552+
548553
/// re-export of [`datafusion_physical_expr`] crate
549554
pub mod physical_expr {
550555
pub use datafusion_physical_expr::*;
@@ -569,10 +574,17 @@ pub mod functions {
569574
pub use datafusion_functions::*;
570575
}
571576

572-
/// re-export of [`datafusion_functions_array`] crate, if "array_expressions" feature is enabled
577+
/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
578+
pub mod functions_nested {
579+
#[cfg(feature = "nested_expressions")]
580+
pub use datafusion_functions_nested::*;
581+
}
582+
583+
/// re-export of [`datafusion_functions_nested`] crate as [`functions_array`] for backward compatibility, if "nested_expressions" feature is enabled
584+
#[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
573585
pub mod functions_array {
574-
#[cfg(feature = "array_expressions")]
575-
pub use datafusion_functions_array::*;
586+
#[cfg(feature = "nested_expressions")]
587+
pub use datafusion_functions_nested::*;
576588
}
577589

578590
/// re-export of [`datafusion_functions_aggregate`] crate

0 commit comments

Comments
 (0)