Skip to content

Commit 4a766c5

Browse files
committed
s
1 parent f4d6c90 commit 4a766c5

File tree

6 files changed

+440
-366
lines changed

6 files changed

+440
-366
lines changed

datafusion-expr/src/expr.rs

Lines changed: 292 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717

1818
//! Expressions
1919
20+
use crate::field_util::get_indexed_field;
2021
use crate::operator::Operator;
22+
use crate::window_frame;
23+
use crate::window_function;
2124
use arrow::{compute::can_cast_types, datatypes::DataType};
2225
use datafusion_common::{
23-
DFField, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
26+
Column, DFField, DFSchema, DataFusionError, ExprSchema, Result, ScalarValue,
2427
};
2528
use std::fmt;
2629
use std::hash::{BuildHasher, Hash, Hasher};
@@ -183,7 +186,7 @@ pub enum Expr {
183186
/// Represents the call of an aggregate built-in function with arguments.
184187
AggregateFunction {
185188
/// Name of the function
186-
fun: aggregates::AggregateFunction,
189+
fun: aggregate::AggregateFunction,
187190
/// List of expressions to feed to the functions as arguments
188191
args: Vec<Expr>,
189192
/// Whether this is a DISTINCT aggregation or not
@@ -192,15 +195,15 @@ pub enum Expr {
192195
/// Represents the call of a window function with arguments.
193196
WindowFunction {
194197
/// Name of the function
195-
fun: window_functions::WindowFunction,
198+
fun: window_function::WindowFunction,
196199
/// List of expressions to feed to the functions as arguments
197200
args: Vec<Expr>,
198201
/// List of partition by expressions
199202
partition_by: Vec<Expr>,
200203
/// List of order by expressions
201204
order_by: Vec<Expr>,
202205
/// Window frame
203-
window_frame: Option<window_frames::WindowFrame>,
206+
window_frame: Option<window_frame::WindowFrame>,
204207
},
205208
/// aggregate function
206209
AggregateUDF {
@@ -981,3 +984,288 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr {
981984
right: Box::new(r),
982985
}
983986
}
987+
988+
impl fmt::Debug for Expr {
989+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
990+
match self {
991+
Expr::Alias(expr, alias) => write!(f, "{:?} AS {}", expr, alias),
992+
Expr::Column(c) => write!(f, "{}", c),
993+
Expr::ScalarVariable(var_names) => write!(f, "{}", var_names.join(".")),
994+
Expr::Literal(v) => write!(f, "{:?}", v),
995+
Expr::Case {
996+
expr,
997+
when_then_expr,
998+
else_expr,
999+
..
1000+
} => {
1001+
write!(f, "CASE ")?;
1002+
if let Some(e) = expr {
1003+
write!(f, "{:?} ", e)?;
1004+
}
1005+
for (w, t) in when_then_expr {
1006+
write!(f, "WHEN {:?} THEN {:?} ", w, t)?;
1007+
}
1008+
if let Some(e) = else_expr {
1009+
write!(f, "ELSE {:?} ", e)?;
1010+
}
1011+
write!(f, "END")
1012+
}
1013+
Expr::Cast { expr, data_type } => {
1014+
write!(f, "CAST({:?} AS {:?})", expr, data_type)
1015+
}
1016+
Expr::TryCast { expr, data_type } => {
1017+
write!(f, "TRY_CAST({:?} AS {:?})", expr, data_type)
1018+
}
1019+
Expr::Not(expr) => write!(f, "NOT {:?}", expr),
1020+
Expr::Negative(expr) => write!(f, "(- {:?})", expr),
1021+
Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr),
1022+
Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr),
1023+
Expr::BinaryExpr { left, op, right } => {
1024+
write!(f, "{:?} {} {:?}", left, op, right)
1025+
}
1026+
Expr::Sort {
1027+
expr,
1028+
asc,
1029+
nulls_first,
1030+
} => {
1031+
if *asc {
1032+
write!(f, "{:?} ASC", expr)?;
1033+
} else {
1034+
write!(f, "{:?} DESC", expr)?;
1035+
}
1036+
if *nulls_first {
1037+
write!(f, " NULLS FIRST")
1038+
} else {
1039+
write!(f, " NULLS LAST")
1040+
}
1041+
}
1042+
Expr::ScalarFunction { fun, args, .. } => {
1043+
fmt_function(f, &fun.to_string(), false, args, false)
1044+
}
1045+
Expr::ScalarUDF { fun, ref args, .. } => {
1046+
fmt_function(f, &fun.name, false, args, false)
1047+
}
1048+
Expr::WindowFunction {
1049+
fun,
1050+
args,
1051+
partition_by,
1052+
order_by,
1053+
window_frame,
1054+
} => {
1055+
fmt_function(f, &fun.to_string(), false, args, false)?;
1056+
if !partition_by.is_empty() {
1057+
write!(f, " PARTITION BY {:?}", partition_by)?;
1058+
}
1059+
if !order_by.is_empty() {
1060+
write!(f, " ORDER BY {:?}", order_by)?;
1061+
}
1062+
if let Some(window_frame) = window_frame {
1063+
write!(
1064+
f,
1065+
" {} BETWEEN {} AND {}",
1066+
window_frame.units,
1067+
window_frame.start_bound,
1068+
window_frame.end_bound
1069+
)?;
1070+
}
1071+
Ok(())
1072+
}
1073+
Expr::AggregateFunction {
1074+
fun,
1075+
distinct,
1076+
ref args,
1077+
..
1078+
} => fmt_function(f, &fun.to_string(), *distinct, args, true),
1079+
Expr::AggregateUDF { fun, ref args, .. } => {
1080+
fmt_function(f, &fun.name, false, args, false)
1081+
}
1082+
Expr::Between {
1083+
expr,
1084+
negated,
1085+
low,
1086+
high,
1087+
} => {
1088+
if *negated {
1089+
write!(f, "{:?} NOT BETWEEN {:?} AND {:?}", expr, low, high)
1090+
} else {
1091+
write!(f, "{:?} BETWEEN {:?} AND {:?}", expr, low, high)
1092+
}
1093+
}
1094+
Expr::InList {
1095+
expr,
1096+
list,
1097+
negated,
1098+
} => {
1099+
if *negated {
1100+
write!(f, "{:?} NOT IN ({:?})", expr, list)
1101+
} else {
1102+
write!(f, "{:?} IN ({:?})", expr, list)
1103+
}
1104+
}
1105+
Expr::Wildcard => write!(f, "*"),
1106+
Expr::GetIndexedField { ref expr, key } => {
1107+
write!(f, "({:?})[{}]", expr, key)
1108+
}
1109+
}
1110+
}
1111+
}
1112+
1113+
/// Returns a readable name of an expression based on the input schema.
1114+
/// This function recursively transverses the expression for names such as "CAST(a > 2)".
1115+
fn create_name(e: &Expr, input_schema: &DFSchema) -> Result<String> {
1116+
match e {
1117+
Expr::Alias(_, name) => Ok(name.clone()),
1118+
Expr::Column(c) => Ok(c.flat_name()),
1119+
Expr::ScalarVariable(variable_names) => Ok(variable_names.join(".")),
1120+
Expr::Literal(value) => Ok(format!("{:?}", value)),
1121+
Expr::BinaryExpr { left, op, right } => {
1122+
let left = create_name(left, input_schema)?;
1123+
let right = create_name(right, input_schema)?;
1124+
Ok(format!("{} {} {}", left, op, right))
1125+
}
1126+
Expr::Case {
1127+
expr,
1128+
when_then_expr,
1129+
else_expr,
1130+
} => {
1131+
let mut name = "CASE ".to_string();
1132+
if let Some(e) = expr {
1133+
let e = create_name(e, input_schema)?;
1134+
name += &format!("{} ", e);
1135+
}
1136+
for (w, t) in when_then_expr {
1137+
let when = create_name(w, input_schema)?;
1138+
let then = create_name(t, input_schema)?;
1139+
name += &format!("WHEN {} THEN {} ", when, then);
1140+
}
1141+
if let Some(e) = else_expr {
1142+
let e = create_name(e, input_schema)?;
1143+
name += &format!("ELSE {} ", e);
1144+
}
1145+
name += "END";
1146+
Ok(name)
1147+
}
1148+
Expr::Cast { expr, data_type } => {
1149+
let expr = create_name(expr, input_schema)?;
1150+
Ok(format!("CAST({} AS {:?})", expr, data_type))
1151+
}
1152+
Expr::TryCast { expr, data_type } => {
1153+
let expr = create_name(expr, input_schema)?;
1154+
Ok(format!("TRY_CAST({} AS {:?})", expr, data_type))
1155+
}
1156+
Expr::Not(expr) => {
1157+
let expr = create_name(expr, input_schema)?;
1158+
Ok(format!("NOT {}", expr))
1159+
}
1160+
Expr::Negative(expr) => {
1161+
let expr = create_name(expr, input_schema)?;
1162+
Ok(format!("(- {})", expr))
1163+
}
1164+
Expr::IsNull(expr) => {
1165+
let expr = create_name(expr, input_schema)?;
1166+
Ok(format!("{} IS NULL", expr))
1167+
}
1168+
Expr::IsNotNull(expr) => {
1169+
let expr = create_name(expr, input_schema)?;
1170+
Ok(format!("{} IS NOT NULL", expr))
1171+
}
1172+
Expr::GetIndexedField { expr, key } => {
1173+
let expr = create_name(expr, input_schema)?;
1174+
Ok(format!("{}[{}]", expr, key))
1175+
}
1176+
Expr::ScalarFunction { fun, args, .. } => {
1177+
create_function_name(&fun.to_string(), false, args, input_schema)
1178+
}
1179+
Expr::ScalarUDF { fun, args, .. } => {
1180+
create_function_name(&fun.name, false, args, input_schema)
1181+
}
1182+
Expr::WindowFunction {
1183+
fun,
1184+
args,
1185+
window_frame,
1186+
partition_by,
1187+
order_by,
1188+
} => {
1189+
let mut parts: Vec<String> = vec![create_function_name(
1190+
&fun.to_string(),
1191+
false,
1192+
args,
1193+
input_schema,
1194+
)?];
1195+
if !partition_by.is_empty() {
1196+
parts.push(format!("PARTITION BY {:?}", partition_by));
1197+
}
1198+
if !order_by.is_empty() {
1199+
parts.push(format!("ORDER BY {:?}", order_by));
1200+
}
1201+
if let Some(window_frame) = window_frame {
1202+
parts.push(format!("{}", window_frame));
1203+
}
1204+
Ok(parts.join(" "))
1205+
}
1206+
Expr::AggregateFunction {
1207+
fun,
1208+
distinct,
1209+
args,
1210+
..
1211+
} => create_function_name(&fun.to_string(), *distinct, args, input_schema),
1212+
Expr::AggregateUDF { fun, args } => {
1213+
let mut names = Vec::with_capacity(args.len());
1214+
for e in args {
1215+
names.push(create_name(e, input_schema)?);
1216+
}
1217+
Ok(format!("{}({})", fun.name, names.join(",")))
1218+
}
1219+
Expr::InList {
1220+
expr,
1221+
list,
1222+
negated,
1223+
} => {
1224+
let expr = create_name(expr, input_schema)?;
1225+
let list = list.iter().map(|expr| create_name(expr, input_schema));
1226+
if *negated {
1227+
Ok(format!("{} NOT IN ({:?})", expr, list))
1228+
} else {
1229+
Ok(format!("{} IN ({:?})", expr, list))
1230+
}
1231+
}
1232+
Expr::Between {
1233+
expr,
1234+
negated,
1235+
low,
1236+
high,
1237+
} => {
1238+
let expr = create_name(expr, input_schema)?;
1239+
let low = create_name(low, input_schema)?;
1240+
let high = create_name(high, input_schema)?;
1241+
if *negated {
1242+
Ok(format!("{} NOT BETWEEN {} AND {}", expr, low, high))
1243+
} else {
1244+
Ok(format!("{} BETWEEN {} AND {}", expr, low, high))
1245+
}
1246+
}
1247+
Expr::Sort { .. } => Err(DataFusionError::Internal(
1248+
"Create name does not support sort expression".to_string(),
1249+
)),
1250+
Expr::Wildcard => Err(DataFusionError::Internal(
1251+
"Create name does not support wildcard".to_string(),
1252+
)),
1253+
}
1254+
}
1255+
1256+
fn create_function_name(
1257+
fun: &str,
1258+
distinct: bool,
1259+
args: &[Expr],
1260+
input_schema: &DFSchema,
1261+
) -> Result<String> {
1262+
let names: Vec<String> = args
1263+
.iter()
1264+
.map(|e| create_name(e, input_schema))
1265+
.collect::<Result<_>>()?;
1266+
let distinct_str = match distinct {
1267+
true => "DISTINCT ",
1268+
false => "",
1269+
};
1270+
Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
1271+
}

0 commit comments

Comments
 (0)