Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ half = { version = "2.1", default-features = false }
hashbrown = { version = "0.14", features = ["raw"] }
indexmap = "2.0.0"
itertools = "0.11"
lazy_static = { version = "^1.4.0" }
log = "^0.4"
num-traits = { version = "0.2", optional = true }
num_cpus = "1.13.0"
Expand Down
1 change: 0 additions & 1 deletion datafusion/expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ path = "src/lib.rs"
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
arrow = { workspace = true }
datafusion-common = { path = "../common", version = "30.0.0" }
lazy_static = { version = "^1.4.0" }
sqlparser = { workspace = true }
strum = { version = "0.25.0", features = ["derive"] }
strum_macros = "0.25.0"
Expand Down
42 changes: 24 additions & 18 deletions datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,10 @@ use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
use std::collections::HashMap;
use std::fmt;
use std::str::FromStr;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};
use strum::IntoEnumIterator;
use strum_macros::EnumIter;

use lazy_static::lazy_static;

/// Enum of all built-in scalar functions
// Contributor's guide for adding new scalar functions
// https://arrow.apache.org/datafusion/contributor-guide/index.html#how-to-add-a-new-scalar-function
Expand Down Expand Up @@ -282,26 +280,34 @@ pub enum BuiltinScalarFunction {
ArrowTypeof,
}

lazy_static! {
/// Maps the sql function name to `BuiltinScalarFunction`
static ref NAME_TO_FUNCTION: HashMap<&'static str, BuiltinScalarFunction> = {
let mut map: HashMap<&'static str, BuiltinScalarFunction> = HashMap::new();
/// Maps the sql function name to `BuiltinScalarFunction`
fn name_to_function() -> &'static HashMap<&'static str, BuiltinScalarFunction> {
static NAME_TO_FUNCTION_LOCK: OnceLock<HashMap<&'static str, BuiltinScalarFunction>> =
OnceLock::new();
NAME_TO_FUNCTION_LOCK.get_or_init(|| {
let mut map = HashMap::new();
BuiltinScalarFunction::iter().for_each(|func| {
let a = aliases(&func);
a.iter().for_each(|a| {map.insert(a, func);});
a.iter().for_each(|&a| {
map.insert(a, func);
});
});
map
};
})
}

/// Maps `BuiltinScalarFunction` --> canonical sql function
/// First alias in the array is used to display function names
static ref FUNCTION_TO_NAME: HashMap<BuiltinScalarFunction, &'static str> = {
let mut map: HashMap<BuiltinScalarFunction, &'static str> = HashMap::new();
/// Maps `BuiltinScalarFunction` --> canonical sql function
/// First alias in the array is used to display function names
fn function_to_name() -> &'static HashMap<BuiltinScalarFunction, &'static str> {
static FUNCTION_TO_NAME_LOCK: OnceLock<HashMap<BuiltinScalarFunction, &'static str>> =
OnceLock::new();
FUNCTION_TO_NAME_LOCK.get_or_init(|| {
let mut map = HashMap::new();
BuiltinScalarFunction::iter().for_each(|func| {
map.insert(func, aliases(&func).first().unwrap_or(&"NO_ALIAS"));
map.insert(func, *aliases(&func).first().unwrap_or(&"NO_ALIAS"));
});
map
};
})
}

impl BuiltinScalarFunction {
Expand Down Expand Up @@ -1379,14 +1385,14 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
impl fmt::Display for BuiltinScalarFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// .unwrap is safe here because compiler makes sure the map will have matches for each BuiltinScalarFunction
write!(f, "{}", FUNCTION_TO_NAME.get(self).unwrap())
write!(f, "{}", function_to_name().get(self).unwrap())
}
}

impl FromStr for BuiltinScalarFunction {
type Err = DataFusionError;
fn from_str(name: &str) -> Result<BuiltinScalarFunction> {
if let Some(func) = NAME_TO_FUNCTION.get(name) {
if let Some(func) = name_to_function().get(name) {
Ok(*func)
} else {
plan_err!("There is no built-in function named {name}")
Expand Down Expand Up @@ -1453,7 +1459,7 @@ mod tests {
// and then back to a variant. The test asserts that the original variant and
// the reconstructed variant are the same.
fn test_display_and_from_str() {
for (_, func_original) in NAME_TO_FUNCTION.iter() {
for (_, func_original) in name_to_function().iter() {
let func_name = func_original.to_string();
let func_from_str = BuiltinScalarFunction::from_str(&func_name).unwrap();
assert_eq!(func_from_str, *func_original);
Expand Down
1 change: 0 additions & 1 deletion datafusion/physical-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ hashbrown = { version = "0.14", features = ["raw"] }
hex = { version = "0.4", optional = true }
indexmap = "2.0.0"
itertools = { version = "0.11", features = ["use_std"] }
lazy_static = { version = "^1.4.0" }
libc = "0.2.140"
log = "^0.4"
md-5 = { version = "^0.10.0", optional = true }
Expand Down
10 changes: 5 additions & 5 deletions datafusion/physical-expr/src/regex_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ use datafusion_common::{
};
use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
use hashbrown::HashMap;
use lazy_static::lazy_static;
use regex::Regex;
use std::sync::Arc;
use std::sync::{Arc, OnceLock};

use crate::functions::{make_scalar_function, make_scalar_function_with_hints, Hint};

Expand Down Expand Up @@ -82,10 +81,11 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
/// replace POSIX capture groups (like \1) with Rust Regex group (like ${1})
/// used by regexp_replace
fn regex_replace_posix_groups(replacement: &str) -> String {
lazy_static! {
static ref CAPTURE_GROUPS_RE: Regex = Regex::new(r"(\\)(\d*)").unwrap();
fn capture_groups_re() -> &'static Regex {
static CAPTURE_GROUPS_RE_LOCK: OnceLock<Regex> = OnceLock::new();
CAPTURE_GROUPS_RE_LOCK.get_or_init(|| Regex::new(r"(\\)(\d*)").unwrap())
}
CAPTURE_GROUPS_RE
capture_groups_re()
.replace_all(replacement, "$${$2}")
.into_owned()
}
Expand Down
1 change: 0 additions & 1 deletion datafusion/sqllogictest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ datafusion = {path = "../core", version = "30.0.0"}
datafusion-common = {path = "../common", version = "30.0.0"}
half = "2.2.1"
itertools = "0.11"
lazy_static = {version = "^1.4.0"}
object_store = "0.7.0"
rust_decimal = {version = "1.27.0"}
log = "^0.4"
Expand Down
51 changes: 25 additions & 26 deletions datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBat
use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
use datafusion_common::DFField;
use datafusion_common::DataFusionError;
use lazy_static::lazy_static;
use std::path::PathBuf;
use std::sync::OnceLock;

use crate::engines::output::DFColumnType;

Expand Down Expand Up @@ -126,7 +126,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
/// ```
fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
row.iter_mut().for_each(|s| {
let workspace_root: &str = WORKSPACE_ROOT.as_ref();
let workspace_root: &str = workspace_root().as_ref();
if s.contains(workspace_root) {
*s = s.replace(workspace_root, "WORKSPACE_ROOT");
}
Expand All @@ -135,33 +135,32 @@ fn normalize_paths(mut row: Vec<String>) -> Vec<String> {
}

/// return the location of the datafusion checkout
fn workspace_root() -> object_store::path::Path {
// e.g. /Software/arrow-datafusion/datafusion/core
let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
fn workspace_root() -> &'static object_store::path::Path {
static WORKSPACE_ROOT_LOCK: OnceLock<object_store::path::Path> = OnceLock::new();
WORKSPACE_ROOT_LOCK.get_or_init(|| {
// e.g. /Software/arrow-datafusion/datafusion/core
let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));

// e.g. /Software/arrow-datafusion/datafusion
let workspace_root = dir
.parent()
.expect("Can not find parent of datafusion/core")
// e.g. /Software/arrow-datafusion
.parent()
.expect("parent of datafusion")
.to_string_lossy();
// e.g. /Software/arrow-datafusion/datafusion
let workspace_root = dir
.parent()
.expect("Can not find parent of datafusion/core")
// e.g. /Software/arrow-datafusion
.parent()
.expect("parent of datafusion")
.to_string_lossy();

let sanitized_workplace_root = if cfg!(windows) {
// Object store paths are delimited with `/`, e.g. `D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`.
// The default windows delimiter is `\`, so the workplace path is `D:\a\arrow-datafusion\arrow-datafusion`.
workspace_root.replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER)
} else {
workspace_root.to_string()
};

object_store::path::Path::parse(sanitized_workplace_root).unwrap()
}
let sanitized_workplace_root = if cfg!(windows) {
// Object store paths are delimited with `/`, e.g. `D:/a/arrow-datafusion/arrow-datafusion/testing/data/csv/aggregate_test_100.csv`.
// The default windows delimiter is `\`, so the workplace path is `D:\a\arrow-datafusion\arrow-datafusion`.
workspace_root
.replace(std::path::MAIN_SEPARATOR, object_store::path::DELIMITER)
} else {
workspace_root.to_string()
};

// holds the root directory
lazy_static! {
static ref WORKSPACE_ROOT: object_store::path::Path = workspace_root();
object_store::path::Path::parse(sanitized_workplace_root).unwrap()
})
}

/// Convert a single batch to a `Vec<Vec<String>>` for comparison
Expand Down