Skip to content

Commit fe46a1e

Browse files
jimexistandygrove
andauthored
[split/1] split datafusion-common module (#1751)
* split datafusion-common module * pyarrow * Update datafusion-common/README.md Co-authored-by: Andy Grove <agrove@apache.org> * Update datafusion/Cargo.toml * include publishing Co-authored-by: Andy Grove <agrove@apache.org>
1 parent 40c29e5 commit fe46a1e

File tree

8 files changed

+302
-191
lines changed

8 files changed

+302
-191
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
[workspace]
1919
members = [
2020
"datafusion",
21+
"datafusion-common",
2122
"datafusion-cli",
2223
"datafusion-examples",
2324
"benchmarks",

datafusion-common/Cargo.toml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-common"
20+
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
21+
version = "6.0.0"
22+
homepage = "https://github.com/apache/arrow-datafusion"
23+
repository = "https://github.com/apache/arrow-datafusion"
24+
readme = "README.md"
25+
authors = ["Apache Arrow <dev@arrow.apache.org>"]
26+
license = "Apache-2.0"
27+
keywords = [ "arrow", "query", "sql" ]
28+
edition = "2021"
29+
rust-version = "1.58"
30+
31+
[lib]
32+
name = "datafusion_common"
33+
path = "src/lib.rs"
34+
35+
[features]
36+
avro = ["avro-rs"]
37+
pyarrow = ["pyo3"]
38+
39+
[dependencies]
40+
arrow = { version = "8.0.0", features = ["prettyprint"] }
41+
parquet = { version = "8.0.0", features = ["arrow"] }
42+
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
43+
pyo3 = { version = "0.15", optional = true }
44+
sqlparser = "0.13"

datafusion-common/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# DataFusion Common
21+
22+
This is an internal module for the most fundamental types of [DataFusion][df].
23+
24+
[df]: https://crates.io/crates/datafusion

datafusion-common/src/error.rs

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! DataFusion error types
19+
20+
use std::error;
21+
use std::fmt::{Display, Formatter};
22+
use std::io;
23+
use std::result;
24+
25+
use arrow::error::ArrowError;
26+
#[cfg(feature = "avro")]
27+
use avro_rs::Error as AvroError;
28+
use parquet::errors::ParquetError;
29+
#[cfg(feature = "pyarrow")]
30+
use pyo3::exceptions::PyException;
31+
#[cfg(feature = "pyarrow")]
32+
use pyo3::prelude::PyErr;
33+
use sqlparser::parser::ParserError;
34+
35+
/// Result type for operations that could result in an [DataFusionError]
36+
pub type Result<T> = result::Result<T, DataFusionError>;
37+
38+
/// Error type for generic operations that could result in DataFusionError::External
39+
pub type GenericError = Box<dyn error::Error + Send + Sync>;
40+
41+
/// DataFusion error
42+
#[derive(Debug)]
43+
pub enum DataFusionError {
44+
/// Error returned by arrow.
45+
ArrowError(ArrowError),
46+
/// Wraps an error from the Parquet crate
47+
ParquetError(ParquetError),
48+
/// Wraps an error from the Avro crate
49+
#[cfg(feature = "avro")]
50+
AvroError(AvroError),
51+
/// Error associated to I/O operations and associated traits.
52+
IoError(io::Error),
53+
/// Error returned when SQL is syntactically incorrect.
54+
SQL(ParserError),
55+
/// Error returned on a branch that we know it is possible
56+
/// but to which we still have no implementation for.
57+
/// Often, these errors are tracked in our issue tracker.
58+
NotImplemented(String),
59+
/// Error returned as a consequence of an error in DataFusion.
60+
/// This error should not happen in normal usage of DataFusion.
61+
// DataFusions has internal invariants that we are unable to ask the compiler to check for us.
62+
// This error is raised when one of those invariants is not verified during execution.
63+
Internal(String),
64+
/// This error happens whenever a plan is not valid. Examples include
65+
/// impossible casts, schema inference not possible and non-unique column names.
66+
Plan(String),
67+
/// Error returned during execution of the query.
68+
/// Examples include files not found, errors in parsing certain types.
69+
Execution(String),
70+
/// This error is thrown when a consumer cannot acquire memory from the Memory Manager
71+
/// we can just cancel the execution of the partition.
72+
ResourcesExhausted(String),
73+
/// Errors originating from outside DataFusion's core codebase.
74+
/// For example, a custom S3Error from the crate datafusion-objectstore-s3
75+
External(GenericError),
76+
}
77+
78+
impl From<io::Error> for DataFusionError {
79+
fn from(e: io::Error) -> Self {
80+
DataFusionError::IoError(e)
81+
}
82+
}
83+
84+
impl From<ArrowError> for DataFusionError {
85+
fn from(e: ArrowError) -> Self {
86+
DataFusionError::ArrowError(e)
87+
}
88+
}
89+
90+
#[cfg(feature = "pyarrow")]
91+
impl From<DataFusionError> for PyErr {
92+
fn from(err: DataFusionError) -> PyErr {
93+
PyException::new_err(err.to_string())
94+
}
95+
}
96+
97+
impl From<DataFusionError> for ArrowError {
98+
fn from(e: DataFusionError) -> Self {
99+
match e {
100+
DataFusionError::ArrowError(e) => e,
101+
DataFusionError::External(e) => ArrowError::ExternalError(e),
102+
other => ArrowError::ExternalError(Box::new(other)),
103+
}
104+
}
105+
}
106+
107+
impl From<ParquetError> for DataFusionError {
108+
fn from(e: ParquetError) -> Self {
109+
DataFusionError::ParquetError(e)
110+
}
111+
}
112+
113+
#[cfg(feature = "avro")]
114+
impl From<AvroError> for DataFusionError {
115+
fn from(e: AvroError) -> Self {
116+
DataFusionError::AvroError(e)
117+
}
118+
}
119+
120+
impl From<ParserError> for DataFusionError {
121+
fn from(e: ParserError) -> Self {
122+
DataFusionError::SQL(e)
123+
}
124+
}
125+
126+
impl From<GenericError> for DataFusionError {
127+
fn from(err: GenericError) -> Self {
128+
DataFusionError::External(err)
129+
}
130+
}
131+
132+
impl Display for DataFusionError {
133+
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
134+
match *self {
135+
DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
136+
DataFusionError::ParquetError(ref desc) => {
137+
write!(f, "Parquet error: {}", desc)
138+
}
139+
#[cfg(feature = "avro")]
140+
DataFusionError::AvroError(ref desc) => {
141+
write!(f, "Avro error: {}", desc)
142+
}
143+
DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
144+
DataFusionError::SQL(ref desc) => {
145+
write!(f, "SQL error: {:?}", desc)
146+
}
147+
DataFusionError::NotImplemented(ref desc) => {
148+
write!(f, "This feature is not implemented: {}", desc)
149+
}
150+
DataFusionError::Internal(ref desc) => {
151+
write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
152+
code and we would welcome that you file an bug report in our issue tracker", desc)
153+
}
154+
DataFusionError::Plan(ref desc) => {
155+
write!(f, "Error during planning: {}", desc)
156+
}
157+
DataFusionError::Execution(ref desc) => {
158+
write!(f, "Execution error: {}", desc)
159+
}
160+
DataFusionError::ResourcesExhausted(ref desc) => {
161+
write!(f, "Resources exhausted: {}", desc)
162+
}
163+
DataFusionError::External(ref desc) => {
164+
write!(f, "External error: {}", desc)
165+
}
166+
}
167+
}
168+
}
169+
170+
impl error::Error for DataFusionError {}
171+
172+
#[cfg(test)]
173+
mod test {
174+
use crate::error::DataFusionError;
175+
use arrow::error::ArrowError;
176+
177+
#[test]
178+
fn arrow_error_to_datafusion() {
179+
let res = return_arrow_error().unwrap_err();
180+
assert_eq!(
181+
res.to_string(),
182+
"External error: Error during planning: foo"
183+
);
184+
}
185+
186+
#[test]
187+
fn datafusion_error_to_arrow() {
188+
let res = return_datafusion_error().unwrap_err();
189+
assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
190+
}
191+
192+
/// Model what happens when implementing SendableRecrordBatchStream:
193+
/// DataFusion code needs to return an ArrowError
194+
#[allow(clippy::try_err)]
195+
fn return_arrow_error() -> arrow::error::Result<()> {
196+
// Expect the '?' to work
197+
let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
198+
Ok(())
199+
}
200+
201+
/// Model what happens when using arrow kernels in DataFusion
202+
/// code: need to turn an ArrowError into a DataFusionError
203+
#[allow(clippy::try_err)]
204+
fn return_datafusion_error() -> crate::error::Result<()> {
205+
// Expect the '?' to work
206+
let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
207+
Ok(())
208+
}
209+
}

datafusion-common/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
mod error;
19+
20+
pub use error::{DataFusionError, Result};

datafusion/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ simd = ["arrow/simd"]
4343
crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
4444
regex_expressions = ["regex"]
4545
unicode_expressions = ["unicode-segmentation"]
46-
pyarrow = ["pyo3", "arrow/pyarrow"]
46+
pyarrow = ["pyo3", "arrow/pyarrow", "datafusion-common/pyarrow"]
4747
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
4848
force_hash_collisions = []
4949
# Used to enable the avro format
50-
avro = ["avro-rs", "num-traits"]
50+
avro = ["avro-rs", "num-traits", "datafusion-common/avro"]
5151

5252
[dependencies]
53+
datafusion-common = { path = "../datafusion-common", version = "6.0.0" }
5354
ahash = { version = "0.7", default-features = false }
5455
hashbrown = { version = "0.12", features = ["raw"] }
5556
arrow = { version = "8.0.0", features = ["prettyprint"] }

0 commit comments

Comments
 (0)