Skip to content

Commit a39a223

Browse files
authored
add datafusion-expr module (#1759)
1 parent 2e535f9 commit a39a223

File tree

9 files changed

+390
-256
lines changed

9 files changed

+390
-256
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
members = [
2020
"datafusion",
2121
"datafusion-common",
22+
"datafusion-expr",
2223
"datafusion-cli",
2324
"datafusion-examples",
2425
"benchmarks",

datafusion-expr/Cargo.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-expr"
20+
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
21+
version = "6.0.0"
22+
homepage = "https://github.com/apache/arrow-datafusion"
23+
repository = "https://github.com/apache/arrow-datafusion"
24+
readme = "../README.md"
25+
authors = ["Apache Arrow <dev@arrow.apache.org>"]
26+
license = "Apache-2.0"
27+
keywords = [ "arrow", "query", "sql" ]
28+
edition = "2021"
29+
rust-version = "1.58"
30+
31+
[lib]
32+
name = "datafusion_expr"
33+
path = "src/lib.rs"
34+
35+
[features]
36+
37+
[dependencies]
38+
datafusion-common = { path = "../datafusion-common", version = "6.0.0" }
39+
arrow = { version = "8.0.0", features = ["prettyprint"] }

datafusion-expr/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# DataFusion Expr
21+
22+
This is an internal module for fundamental expression types of [DataFusion][df].
23+
24+
[df]: https://crates.io/crates/datafusion
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use datafusion_common::{DataFusionError, Result};
19+
use std::{fmt, str::FromStr};
20+
21+
/// Enum of all built-in aggregate functions
22+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
23+
pub enum AggregateFunction {
24+
/// count
25+
Count,
26+
/// sum
27+
Sum,
28+
/// min
29+
Min,
30+
/// max
31+
Max,
32+
/// avg
33+
Avg,
34+
/// Approximate aggregate function
35+
ApproxDistinct,
36+
/// array_agg
37+
ArrayAgg,
38+
/// Variance (Sample)
39+
Variance,
40+
/// Variance (Population)
41+
VariancePop,
42+
/// Standard Deviation (Sample)
43+
Stddev,
44+
/// Standard Deviation (Population)
45+
StddevPop,
46+
/// Covariance (Sample)
47+
Covariance,
48+
/// Covariance (Population)
49+
CovariancePop,
50+
/// Correlation
51+
Correlation,
52+
/// Approximate continuous percentile function
53+
ApproxPercentileCont,
54+
}
55+
56+
impl fmt::Display for AggregateFunction {
57+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58+
// uppercase of the debug.
59+
write!(f, "{}", format!("{:?}", self).to_uppercase())
60+
}
61+
}
62+
63+
impl FromStr for AggregateFunction {
64+
type Err = DataFusionError;
65+
fn from_str(name: &str) -> Result<AggregateFunction> {
66+
Ok(match name {
67+
"min" => AggregateFunction::Min,
68+
"max" => AggregateFunction::Max,
69+
"count" => AggregateFunction::Count,
70+
"avg" => AggregateFunction::Avg,
71+
"sum" => AggregateFunction::Sum,
72+
"approx_distinct" => AggregateFunction::ApproxDistinct,
73+
"array_agg" => AggregateFunction::ArrayAgg,
74+
"var" => AggregateFunction::Variance,
75+
"var_samp" => AggregateFunction::Variance,
76+
"var_pop" => AggregateFunction::VariancePop,
77+
"stddev" => AggregateFunction::Stddev,
78+
"stddev_samp" => AggregateFunction::Stddev,
79+
"stddev_pop" => AggregateFunction::StddevPop,
80+
"covar" => AggregateFunction::Covariance,
81+
"covar_samp" => AggregateFunction::Covariance,
82+
"covar_pop" => AggregateFunction::CovariancePop,
83+
"corr" => AggregateFunction::Correlation,
84+
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
85+
_ => {
86+
return Err(DataFusionError::Plan(format!(
87+
"There is no built-in function named {}",
88+
name
89+
)));
90+
}
91+
})
92+
}
93+
}

datafusion-expr/src/lib.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
mod aggregate_function;
19+
mod window_function;
20+
21+
pub use aggregate_function::AggregateFunction;
22+
pub use window_function::{BuiltInWindowFunction, WindowFunction};
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::aggregate_function::AggregateFunction;
19+
use datafusion_common::{DataFusionError, Result};
20+
use std::{fmt, str::FromStr};
21+
22+
/// WindowFunction
23+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
24+
pub enum WindowFunction {
25+
/// window function that leverages an aggregate function
26+
AggregateFunction(AggregateFunction),
27+
/// window function that leverages a built-in window function
28+
BuiltInWindowFunction(BuiltInWindowFunction),
29+
}
30+
31+
impl FromStr for WindowFunction {
32+
type Err = DataFusionError;
33+
fn from_str(name: &str) -> Result<WindowFunction> {
34+
let name = name.to_lowercase();
35+
if let Ok(aggregate) = AggregateFunction::from_str(name.as_str()) {
36+
Ok(WindowFunction::AggregateFunction(aggregate))
37+
} else if let Ok(built_in_function) =
38+
BuiltInWindowFunction::from_str(name.as_str())
39+
{
40+
Ok(WindowFunction::BuiltInWindowFunction(built_in_function))
41+
} else {
42+
Err(DataFusionError::Plan(format!(
43+
"There is no window function named {}",
44+
name
45+
)))
46+
}
47+
}
48+
}
49+
50+
impl fmt::Display for BuiltInWindowFunction {
51+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
52+
match self {
53+
BuiltInWindowFunction::RowNumber => write!(f, "ROW_NUMBER"),
54+
BuiltInWindowFunction::Rank => write!(f, "RANK"),
55+
BuiltInWindowFunction::DenseRank => write!(f, "DENSE_RANK"),
56+
BuiltInWindowFunction::PercentRank => write!(f, "PERCENT_RANK"),
57+
BuiltInWindowFunction::CumeDist => write!(f, "CUME_DIST"),
58+
BuiltInWindowFunction::Ntile => write!(f, "NTILE"),
59+
BuiltInWindowFunction::Lag => write!(f, "LAG"),
60+
BuiltInWindowFunction::Lead => write!(f, "LEAD"),
61+
BuiltInWindowFunction::FirstValue => write!(f, "FIRST_VALUE"),
62+
BuiltInWindowFunction::LastValue => write!(f, "LAST_VALUE"),
63+
BuiltInWindowFunction::NthValue => write!(f, "NTH_VALUE"),
64+
}
65+
}
66+
}
67+
68+
impl fmt::Display for WindowFunction {
69+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
70+
match self {
71+
WindowFunction::AggregateFunction(fun) => fun.fmt(f),
72+
WindowFunction::BuiltInWindowFunction(fun) => fun.fmt(f),
73+
}
74+
}
75+
}
76+
77+
/// An aggregate function that is part of a built-in window function
78+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
79+
pub enum BuiltInWindowFunction {
80+
/// number of the current row within its partition, counting from 1
81+
RowNumber,
82+
/// rank of the current row with gaps; same as row_number of its first peer
83+
Rank,
84+
/// ank of the current row without gaps; this function counts peer groups
85+
DenseRank,
86+
/// relative rank of the current row: (rank - 1) / (total rows - 1)
87+
PercentRank,
88+
/// relative rank of the current row: (number of rows preceding or peer with current row) / (total rows)
89+
CumeDist,
90+
/// integer ranging from 1 to the argument value, dividing the partition as equally as possible
91+
Ntile,
92+
/// returns value evaluated at the row that is offset rows before the current row within the partition;
93+
/// if there is no such row, instead return default (which must be of the same type as value).
94+
/// Both offset and default are evaluated with respect to the current row.
95+
/// If omitted, offset defaults to 1 and default to null
96+
Lag,
97+
/// returns value evaluated at the row that is offset rows after the current row within the partition;
98+
/// if there is no such row, instead return default (which must be of the same type as value).
99+
/// Both offset and default are evaluated with respect to the current row.
100+
/// If omitted, offset defaults to 1 and default to null
101+
Lead,
102+
/// returns value evaluated at the row that is the first row of the window frame
103+
FirstValue,
104+
/// returns value evaluated at the row that is the last row of the window frame
105+
LastValue,
106+
/// returns value evaluated at the row that is the nth row of the window frame (counting from 1); null if no such row
107+
NthValue,
108+
}
109+
110+
impl FromStr for BuiltInWindowFunction {
111+
type Err = DataFusionError;
112+
fn from_str(name: &str) -> Result<BuiltInWindowFunction> {
113+
Ok(match name.to_uppercase().as_str() {
114+
"ROW_NUMBER" => BuiltInWindowFunction::RowNumber,
115+
"RANK" => BuiltInWindowFunction::Rank,
116+
"DENSE_RANK" => BuiltInWindowFunction::DenseRank,
117+
"PERCENT_RANK" => BuiltInWindowFunction::PercentRank,
118+
"CUME_DIST" => BuiltInWindowFunction::CumeDist,
119+
"NTILE" => BuiltInWindowFunction::Ntile,
120+
"LAG" => BuiltInWindowFunction::Lag,
121+
"LEAD" => BuiltInWindowFunction::Lead,
122+
"FIRST_VALUE" => BuiltInWindowFunction::FirstValue,
123+
"LAST_VALUE" => BuiltInWindowFunction::LastValue,
124+
"NTH_VALUE" => BuiltInWindowFunction::NthValue,
125+
_ => {
126+
return Err(DataFusionError::Plan(format!(
127+
"There is no built-in window function named {}",
128+
name
129+
)))
130+
}
131+
})
132+
}
133+
}
134+
135+
#[cfg(test)]
136+
mod tests {
137+
use super::*;
138+
139+
#[test]
140+
fn test_window_function_case_insensitive() -> Result<()> {
141+
let names = vec![
142+
"row_number",
143+
"rank",
144+
"dense_rank",
145+
"percent_rank",
146+
"cume_dist",
147+
"ntile",
148+
"lag",
149+
"lead",
150+
"first_value",
151+
"last_value",
152+
"nth_value",
153+
"min",
154+
"max",
155+
"count",
156+
"avg",
157+
"sum",
158+
];
159+
for name in names {
160+
let fun = WindowFunction::from_str(name)?;
161+
let fun2 = WindowFunction::from_str(name.to_uppercase().as_str())?;
162+
assert_eq!(fun, fun2);
163+
assert_eq!(fun.to_string(), name.to_uppercase());
164+
}
165+
Ok(())
166+
}
167+
168+
#[test]
169+
fn test_window_function_from_str() -> Result<()> {
170+
assert_eq!(
171+
WindowFunction::from_str("max")?,
172+
WindowFunction::AggregateFunction(AggregateFunction::Max)
173+
);
174+
assert_eq!(
175+
WindowFunction::from_str("min")?,
176+
WindowFunction::AggregateFunction(AggregateFunction::Min)
177+
);
178+
assert_eq!(
179+
WindowFunction::from_str("avg")?,
180+
WindowFunction::AggregateFunction(AggregateFunction::Avg)
181+
);
182+
assert_eq!(
183+
WindowFunction::from_str("cume_dist")?,
184+
WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::CumeDist)
185+
);
186+
assert_eq!(
187+
WindowFunction::from_str("first_value")?,
188+
WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::FirstValue)
189+
);
190+
assert_eq!(
191+
WindowFunction::from_str("LAST_value")?,
192+
WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::LastValue)
193+
);
194+
assert_eq!(
195+
WindowFunction::from_str("LAG")?,
196+
WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::Lag)
197+
);
198+
assert_eq!(
199+
WindowFunction::from_str("LEAD")?,
200+
WindowFunction::BuiltInWindowFunction(BuiltInWindowFunction::Lead)
201+
);
202+
Ok(())
203+
}
204+
}

0 commit comments

Comments
 (0)