Skip to content

Commit 6ede97b

Browse files
committed
split datafusion-common module
1 parent 4f4153b commit 6ede97b

File tree

7 files changed

+287
-182
lines changed

7 files changed

+287
-182
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
[workspace]
1919
members = [
2020
"datafusion",
21+
"datafusion-common",
2122
"datafusion-cli",
2223
"datafusion-examples",
2324
"benchmarks",

datafusion-common/Cargo.toml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "datafusion-common"
20+
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
21+
version = "6.0.0"
22+
homepage = "https://github.com/apache/arrow-datafusion"
23+
repository = "https://github.com/apache/arrow-datafusion"
24+
readme = "../README.md"
25+
authors = ["Apache Arrow <dev@arrow.apache.org>"]
26+
license = "Apache-2.0"
27+
keywords = [ "arrow", "query", "sql" ]
28+
publish = false
29+
edition = "2021"
30+
rust-version = "1.58"
31+
32+
[lib]
33+
name = "datafusion_common"
34+
path = "src/lib.rs"
35+
36+
[features]
37+
avro = ["avro-rs"]
38+
39+
[dependencies]
40+
arrow = { version = "8.0.0", features = ["prettyprint"] }
41+
parquet = { version = "8.0.0", features = ["arrow"] }
42+
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
43+
sqlparser = "0.13"

datafusion-common/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!---
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# DataFusion Common
21+
22+
This is an internal module for the most fundamental types of datafusion.

datafusion-common/src/error.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! DataFusion error types
19+
20+
use std::error;
21+
use std::fmt::{Display, Formatter};
22+
use std::io;
23+
use std::result;
24+
25+
use arrow::error::ArrowError;
26+
#[cfg(feature = "avro")]
27+
use avro_rs::Error as AvroError;
28+
use parquet::errors::ParquetError;
29+
use sqlparser::parser::ParserError;
30+
31+
/// Result type for operations that could result in an [DataFusionError]
32+
pub type Result<T> = result::Result<T, DataFusionError>;
33+
34+
/// Error type for generic operations that could result in DataFusionError::External
35+
pub type GenericError = Box<dyn error::Error + Send + Sync>;
36+
37+
/// DataFusion error
38+
#[derive(Debug)]
39+
pub enum DataFusionError {
40+
/// Error returned by arrow.
41+
ArrowError(ArrowError),
42+
/// Wraps an error from the Parquet crate
43+
ParquetError(ParquetError),
44+
/// Wraps an error from the Avro crate
45+
#[cfg(feature = "avro")]
46+
AvroError(AvroError),
47+
/// Error associated to I/O operations and associated traits.
48+
IoError(io::Error),
49+
/// Error returned when SQL is syntactically incorrect.
50+
SQL(ParserError),
51+
/// Error returned on a branch that we know it is possible
52+
/// but to which we still have no implementation for.
53+
/// Often, these errors are tracked in our issue tracker.
54+
NotImplemented(String),
55+
/// Error returned as a consequence of an error in DataFusion.
56+
/// This error should not happen in normal usage of DataFusion.
57+
// DataFusions has internal invariants that we are unable to ask the compiler to check for us.
58+
// This error is raised when one of those invariants is not verified during execution.
59+
Internal(String),
60+
/// This error happens whenever a plan is not valid. Examples include
61+
/// impossible casts, schema inference not possible and non-unique column names.
62+
Plan(String),
63+
/// Error returned during execution of the query.
64+
/// Examples include files not found, errors in parsing certain types.
65+
Execution(String),
66+
/// This error is thrown when a consumer cannot acquire memory from the Memory Manager
67+
/// we can just cancel the execution of the partition.
68+
ResourcesExhausted(String),
69+
/// Errors originating from outside DataFusion's core codebase.
70+
/// For example, a custom S3Error from the crate datafusion-objectstore-s3
71+
External(GenericError),
72+
}
73+
74+
impl From<io::Error> for DataFusionError {
75+
fn from(e: io::Error) -> Self {
76+
DataFusionError::IoError(e)
77+
}
78+
}
79+
80+
impl From<ArrowError> for DataFusionError {
81+
fn from(e: ArrowError) -> Self {
82+
DataFusionError::ArrowError(e)
83+
}
84+
}
85+
86+
impl From<DataFusionError> for ArrowError {
87+
fn from(e: DataFusionError) -> Self {
88+
match e {
89+
DataFusionError::ArrowError(e) => e,
90+
DataFusionError::External(e) => ArrowError::ExternalError(e),
91+
other => ArrowError::ExternalError(Box::new(other)),
92+
}
93+
}
94+
}
95+
96+
impl From<ParquetError> for DataFusionError {
97+
fn from(e: ParquetError) -> Self {
98+
DataFusionError::ParquetError(e)
99+
}
100+
}
101+
102+
#[cfg(feature = "avro")]
103+
impl From<AvroError> for DataFusionError {
104+
fn from(e: AvroError) -> Self {
105+
DataFusionError::AvroError(e)
106+
}
107+
}
108+
109+
impl From<ParserError> for DataFusionError {
110+
fn from(e: ParserError) -> Self {
111+
DataFusionError::SQL(e)
112+
}
113+
}
114+
115+
impl From<GenericError> for DataFusionError {
116+
fn from(err: GenericError) -> Self {
117+
DataFusionError::External(err)
118+
}
119+
}
120+
121+
impl Display for DataFusionError {
122+
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
123+
match *self {
124+
DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
125+
DataFusionError::ParquetError(ref desc) => {
126+
write!(f, "Parquet error: {}", desc)
127+
}
128+
#[cfg(feature = "avro")]
129+
DataFusionError::AvroError(ref desc) => {
130+
write!(f, "Avro error: {}", desc)
131+
}
132+
DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
133+
DataFusionError::SQL(ref desc) => {
134+
write!(f, "SQL error: {:?}", desc)
135+
}
136+
DataFusionError::NotImplemented(ref desc) => {
137+
write!(f, "This feature is not implemented: {}", desc)
138+
}
139+
DataFusionError::Internal(ref desc) => {
140+
write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
141+
code and we would welcome that you file an bug report in our issue tracker", desc)
142+
}
143+
DataFusionError::Plan(ref desc) => {
144+
write!(f, "Error during planning: {}", desc)
145+
}
146+
DataFusionError::Execution(ref desc) => {
147+
write!(f, "Execution error: {}", desc)
148+
}
149+
DataFusionError::ResourcesExhausted(ref desc) => {
150+
write!(f, "Resources exhausted: {}", desc)
151+
}
152+
DataFusionError::External(ref desc) => {
153+
write!(f, "External error: {}", desc)
154+
}
155+
}
156+
}
157+
}
158+
159+
impl error::Error for DataFusionError {}
160+
161+
#[cfg(test)]
162+
mod test {
163+
use crate::error::DataFusionError;
164+
use arrow::error::ArrowError;
165+
166+
#[test]
167+
fn arrow_error_to_datafusion() {
168+
let res = return_arrow_error().unwrap_err();
169+
assert_eq!(
170+
res.to_string(),
171+
"External error: Error during planning: foo"
172+
);
173+
}
174+
175+
#[test]
176+
fn datafusion_error_to_arrow() {
177+
let res = return_datafusion_error().unwrap_err();
178+
assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
179+
}
180+
181+
/// Model what happens when implementing SendableRecrordBatchStream:
182+
/// DataFusion code needs to return an ArrowError
183+
#[allow(clippy::try_err)]
184+
fn return_arrow_error() -> arrow::error::Result<()> {
185+
// Expect the '?' to work
186+
let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
187+
Ok(())
188+
}
189+
190+
/// Model what happens when using arrow kernels in DataFusion
191+
/// code: need to turn an ArrowError into a DataFusionError
192+
#[allow(clippy::try_err)]
193+
fn return_datafusion_error() -> crate::error::Result<()> {
194+
// Expect the '?' to work
195+
let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
196+
Ok(())
197+
}
198+
}

datafusion-common/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
mod error;
19+
20+
pub use error::{DataFusionError, Result};

datafusion/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ pyarrow = ["pyo3", "arrow/pyarrow"]
4747
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
4848
force_hash_collisions = []
4949
# Used to enable the avro format
50-
avro = ["avro-rs", "num-traits"]
50+
avro = ["avro-rs", "num-traits", "datafusion-common/avro"]
5151

5252
[dependencies]
53+
datafusion-common = { path = "../datafusion-common" }
5354
ahash = { version = "0.7", default-features = false }
5455
hashbrown = { version = "0.12", features = ["raw"] }
5556
arrow = { version = "8.0.0", features = ["prettyprint"] }

0 commit comments

Comments
 (0)