Skip to content

Commit 3540d00

Browse files
committed
pyarrow
1 parent 86dcb09 commit 3540d00

File tree

7 files changed

+446
-377
lines changed

7 files changed

+446
-377
lines changed

datafusion-common/src/pyarrow.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ use pyo3::prelude::PyErr;
2525
use pyo3::types::PyList;
2626
use pyo3::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python};
2727

28-
impl From<DataFusionError> for PyErr {
29-
fn from(err: DataFusionError) -> PyErr {
30-
PyException::new_err(err.to_string())
31-
}
32-
}
33-
3428
impl PyArrowConvert for ScalarValue {
3529
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
3630
let py = value.py();

datafusion-expr/src/accumulator.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::ArrayRef;
19+
use datafusion_common::{Result, ScalarValue};
20+
use std::fmt::Debug;
21+
22+
/// An accumulator represents a stateful object that lives throughout the evaluation of multiple rows and
23+
/// generically accumulates values.
24+
///
25+
/// An accumulator knows how to:
26+
/// * update its state from inputs via `update_batch`
27+
/// * convert its internal state to a vector of scalar values
28+
/// * update its state from multiple accumulators' states via `merge_batch`
29+
/// * compute the final value from its internal state via `evaluate`
30+
pub trait Accumulator: Send + Sync + Debug {
31+
/// Returns the state of the accumulator at the end of the accumulation.
32+
// in the case of an average on which we track `sum` and `n`, this function should return a vector
33+
// of two values, sum and n.
34+
fn state(&self) -> Result<Vec<ScalarValue>>;
35+
36+
/// updates the accumulator's state from a vector of arrays.
37+
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()>;
38+
39+
/// updates the accumulator's state from a vector of states.
40+
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()>;
41+
42+
/// returns its value based on its current state.
43+
fn evaluate(&self) -> Result<ScalarValue>;
44+
}
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Built-in functions
19+
20+
use crate::Volatility;
21+
use datafusion_common::{DataFusionError, Result};
22+
use std::fmt;
23+
use std::str::FromStr;
24+
25+
/// Enum of all built-in scalar functions
26+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
27+
pub enum BuiltinScalarFunction {
28+
// math functions
29+
/// abs
30+
Abs,
31+
/// acos
32+
Acos,
33+
/// asin
34+
Asin,
35+
/// atan
36+
Atan,
37+
/// ceil
38+
Ceil,
39+
/// cos
40+
Cos,
41+
/// Digest
42+
Digest,
43+
/// exp
44+
Exp,
45+
/// floor
46+
Floor,
47+
/// ln, Natural logarithm
48+
Ln,
49+
/// log, same as log10
50+
Log,
51+
/// log10
52+
Log10,
53+
/// log2
54+
Log2,
55+
/// round
56+
Round,
57+
/// signum
58+
Signum,
59+
/// sin
60+
Sin,
61+
/// sqrt
62+
Sqrt,
63+
/// tan
64+
Tan,
65+
/// trunc
66+
Trunc,
67+
68+
// string functions
69+
/// construct an array from columns
70+
Array,
71+
/// ascii
72+
Ascii,
73+
/// bit_length
74+
BitLength,
75+
/// btrim
76+
Btrim,
77+
/// character_length
78+
CharacterLength,
79+
/// chr
80+
Chr,
81+
/// concat
82+
Concat,
83+
/// concat_ws
84+
ConcatWithSeparator,
85+
/// date_part
86+
DatePart,
87+
/// date_trunc
88+
DateTrunc,
89+
/// initcap
90+
InitCap,
91+
/// left
92+
Left,
93+
/// lpad
94+
Lpad,
95+
/// lower
96+
Lower,
97+
/// ltrim
98+
Ltrim,
99+
/// md5
100+
MD5,
101+
/// nullif
102+
NullIf,
103+
/// octet_length
104+
OctetLength,
105+
/// random
106+
Random,
107+
/// regexp_replace
108+
RegexpReplace,
109+
/// repeat
110+
Repeat,
111+
/// replace
112+
Replace,
113+
/// reverse
114+
Reverse,
115+
/// right
116+
Right,
117+
/// rpad
118+
Rpad,
119+
/// rtrim
120+
Rtrim,
121+
/// sha224
122+
SHA224,
123+
/// sha256
124+
SHA256,
125+
/// sha384
126+
SHA384,
127+
/// Sha512
128+
SHA512,
129+
/// split_part
130+
SplitPart,
131+
/// starts_with
132+
StartsWith,
133+
/// strpos
134+
Strpos,
135+
/// substr
136+
Substr,
137+
/// to_hex
138+
ToHex,
139+
/// to_timestamp
140+
ToTimestamp,
141+
/// to_timestamp_millis
142+
ToTimestampMillis,
143+
/// to_timestamp_micros
144+
ToTimestampMicros,
145+
/// to_timestamp_seconds
146+
ToTimestampSeconds,
147+
///now
148+
Now,
149+
/// translate
150+
Translate,
151+
/// trim
152+
Trim,
153+
/// upper
154+
Upper,
155+
/// regexp_match
156+
RegexpMatch,
157+
}
158+
159+
impl BuiltinScalarFunction {
160+
/// an allowlist of functions to take zero arguments, so that they will get special treatment
161+
/// while executing.
162+
pub fn supports_zero_argument(&self) -> bool {
163+
matches!(
164+
self,
165+
BuiltinScalarFunction::Random | BuiltinScalarFunction::Now
166+
)
167+
}
168+
/// Returns the [Volatility] of the builtin function.
169+
pub fn volatility(&self) -> Volatility {
170+
match self {
171+
//Immutable scalar builtins
172+
BuiltinScalarFunction::Abs => Volatility::Immutable,
173+
BuiltinScalarFunction::Acos => Volatility::Immutable,
174+
BuiltinScalarFunction::Asin => Volatility::Immutable,
175+
BuiltinScalarFunction::Atan => Volatility::Immutable,
176+
BuiltinScalarFunction::Ceil => Volatility::Immutable,
177+
BuiltinScalarFunction::Cos => Volatility::Immutable,
178+
BuiltinScalarFunction::Exp => Volatility::Immutable,
179+
BuiltinScalarFunction::Floor => Volatility::Immutable,
180+
BuiltinScalarFunction::Ln => Volatility::Immutable,
181+
BuiltinScalarFunction::Log => Volatility::Immutable,
182+
BuiltinScalarFunction::Log10 => Volatility::Immutable,
183+
BuiltinScalarFunction::Log2 => Volatility::Immutable,
184+
BuiltinScalarFunction::Round => Volatility::Immutable,
185+
BuiltinScalarFunction::Signum => Volatility::Immutable,
186+
BuiltinScalarFunction::Sin => Volatility::Immutable,
187+
BuiltinScalarFunction::Sqrt => Volatility::Immutable,
188+
BuiltinScalarFunction::Tan => Volatility::Immutable,
189+
BuiltinScalarFunction::Trunc => Volatility::Immutable,
190+
BuiltinScalarFunction::Array => Volatility::Immutable,
191+
BuiltinScalarFunction::Ascii => Volatility::Immutable,
192+
BuiltinScalarFunction::BitLength => Volatility::Immutable,
193+
BuiltinScalarFunction::Btrim => Volatility::Immutable,
194+
BuiltinScalarFunction::CharacterLength => Volatility::Immutable,
195+
BuiltinScalarFunction::Chr => Volatility::Immutable,
196+
BuiltinScalarFunction::Concat => Volatility::Immutable,
197+
BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable,
198+
BuiltinScalarFunction::DatePart => Volatility::Immutable,
199+
BuiltinScalarFunction::DateTrunc => Volatility::Immutable,
200+
BuiltinScalarFunction::InitCap => Volatility::Immutable,
201+
BuiltinScalarFunction::Left => Volatility::Immutable,
202+
BuiltinScalarFunction::Lpad => Volatility::Immutable,
203+
BuiltinScalarFunction::Lower => Volatility::Immutable,
204+
BuiltinScalarFunction::Ltrim => Volatility::Immutable,
205+
BuiltinScalarFunction::MD5 => Volatility::Immutable,
206+
BuiltinScalarFunction::NullIf => Volatility::Immutable,
207+
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
208+
BuiltinScalarFunction::RegexpReplace => Volatility::Immutable,
209+
BuiltinScalarFunction::Repeat => Volatility::Immutable,
210+
BuiltinScalarFunction::Replace => Volatility::Immutable,
211+
BuiltinScalarFunction::Reverse => Volatility::Immutable,
212+
BuiltinScalarFunction::Right => Volatility::Immutable,
213+
BuiltinScalarFunction::Rpad => Volatility::Immutable,
214+
BuiltinScalarFunction::Rtrim => Volatility::Immutable,
215+
BuiltinScalarFunction::SHA224 => Volatility::Immutable,
216+
BuiltinScalarFunction::SHA256 => Volatility::Immutable,
217+
BuiltinScalarFunction::SHA384 => Volatility::Immutable,
218+
BuiltinScalarFunction::SHA512 => Volatility::Immutable,
219+
BuiltinScalarFunction::Digest => Volatility::Immutable,
220+
BuiltinScalarFunction::SplitPart => Volatility::Immutable,
221+
BuiltinScalarFunction::StartsWith => Volatility::Immutable,
222+
BuiltinScalarFunction::Strpos => Volatility::Immutable,
223+
BuiltinScalarFunction::Substr => Volatility::Immutable,
224+
BuiltinScalarFunction::ToHex => Volatility::Immutable,
225+
BuiltinScalarFunction::ToTimestamp => Volatility::Immutable,
226+
BuiltinScalarFunction::ToTimestampMillis => Volatility::Immutable,
227+
BuiltinScalarFunction::ToTimestampMicros => Volatility::Immutable,
228+
BuiltinScalarFunction::ToTimestampSeconds => Volatility::Immutable,
229+
BuiltinScalarFunction::Translate => Volatility::Immutable,
230+
BuiltinScalarFunction::Trim => Volatility::Immutable,
231+
BuiltinScalarFunction::Upper => Volatility::Immutable,
232+
BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
233+
234+
//Stable builtin functions
235+
BuiltinScalarFunction::Now => Volatility::Stable,
236+
237+
//Volatile builtin functions
238+
BuiltinScalarFunction::Random => Volatility::Volatile,
239+
}
240+
}
241+
}
242+
243+
impl fmt::Display for BuiltinScalarFunction {
244+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
245+
// lowercase of the debug.
246+
write!(f, "{}", format!("{:?}", self).to_lowercase())
247+
}
248+
}
249+
250+
impl FromStr for BuiltinScalarFunction {
251+
type Err = DataFusionError;
252+
fn from_str(name: &str) -> Result<BuiltinScalarFunction> {
253+
Ok(match name {
254+
// math functions
255+
"abs" => BuiltinScalarFunction::Abs,
256+
"acos" => BuiltinScalarFunction::Acos,
257+
"asin" => BuiltinScalarFunction::Asin,
258+
"atan" => BuiltinScalarFunction::Atan,
259+
"ceil" => BuiltinScalarFunction::Ceil,
260+
"cos" => BuiltinScalarFunction::Cos,
261+
"exp" => BuiltinScalarFunction::Exp,
262+
"floor" => BuiltinScalarFunction::Floor,
263+
"ln" => BuiltinScalarFunction::Ln,
264+
"log" => BuiltinScalarFunction::Log,
265+
"log10" => BuiltinScalarFunction::Log10,
266+
"log2" => BuiltinScalarFunction::Log2,
267+
"round" => BuiltinScalarFunction::Round,
268+
"signum" => BuiltinScalarFunction::Signum,
269+
"sin" => BuiltinScalarFunction::Sin,
270+
"sqrt" => BuiltinScalarFunction::Sqrt,
271+
"tan" => BuiltinScalarFunction::Tan,
272+
"trunc" => BuiltinScalarFunction::Trunc,
273+
274+
// string functions
275+
"array" => BuiltinScalarFunction::Array,
276+
"ascii" => BuiltinScalarFunction::Ascii,
277+
"bit_length" => BuiltinScalarFunction::BitLength,
278+
"btrim" => BuiltinScalarFunction::Btrim,
279+
"char_length" => BuiltinScalarFunction::CharacterLength,
280+
"character_length" => BuiltinScalarFunction::CharacterLength,
281+
"concat" => BuiltinScalarFunction::Concat,
282+
"concat_ws" => BuiltinScalarFunction::ConcatWithSeparator,
283+
"chr" => BuiltinScalarFunction::Chr,
284+
"date_part" | "datepart" => BuiltinScalarFunction::DatePart,
285+
"date_trunc" | "datetrunc" => BuiltinScalarFunction::DateTrunc,
286+
"initcap" => BuiltinScalarFunction::InitCap,
287+
"left" => BuiltinScalarFunction::Left,
288+
"length" => BuiltinScalarFunction::CharacterLength,
289+
"lower" => BuiltinScalarFunction::Lower,
290+
"lpad" => BuiltinScalarFunction::Lpad,
291+
"ltrim" => BuiltinScalarFunction::Ltrim,
292+
"md5" => BuiltinScalarFunction::MD5,
293+
"nullif" => BuiltinScalarFunction::NullIf,
294+
"octet_length" => BuiltinScalarFunction::OctetLength,
295+
"random" => BuiltinScalarFunction::Random,
296+
"regexp_replace" => BuiltinScalarFunction::RegexpReplace,
297+
"repeat" => BuiltinScalarFunction::Repeat,
298+
"replace" => BuiltinScalarFunction::Replace,
299+
"reverse" => BuiltinScalarFunction::Reverse,
300+
"right" => BuiltinScalarFunction::Right,
301+
"rpad" => BuiltinScalarFunction::Rpad,
302+
"rtrim" => BuiltinScalarFunction::Rtrim,
303+
"sha224" => BuiltinScalarFunction::SHA224,
304+
"sha256" => BuiltinScalarFunction::SHA256,
305+
"sha384" => BuiltinScalarFunction::SHA384,
306+
"sha512" => BuiltinScalarFunction::SHA512,
307+
"digest" => BuiltinScalarFunction::Digest,
308+
"split_part" => BuiltinScalarFunction::SplitPart,
309+
"starts_with" => BuiltinScalarFunction::StartsWith,
310+
"strpos" => BuiltinScalarFunction::Strpos,
311+
"substr" => BuiltinScalarFunction::Substr,
312+
"to_hex" => BuiltinScalarFunction::ToHex,
313+
"to_timestamp" => BuiltinScalarFunction::ToTimestamp,
314+
"to_timestamp_millis" => BuiltinScalarFunction::ToTimestampMillis,
315+
"to_timestamp_micros" => BuiltinScalarFunction::ToTimestampMicros,
316+
"to_timestamp_seconds" => BuiltinScalarFunction::ToTimestampSeconds,
317+
"now" => BuiltinScalarFunction::Now,
318+
"translate" => BuiltinScalarFunction::Translate,
319+
"trim" => BuiltinScalarFunction::Trim,
320+
"upper" => BuiltinScalarFunction::Upper,
321+
"regexp_match" => BuiltinScalarFunction::RegexpMatch,
322+
_ => {
323+
return Err(DataFusionError::Plan(format!(
324+
"There is no built-in function named {}",
325+
name
326+
)))
327+
}
328+
})
329+
}
330+
}

0 commit comments

Comments
 (0)