Skip to content

Commit f992066

Browse files
committed
Add dialect param to use CHAR instead of TEXT for Utf8 unparsing for MySQL (#12)
1 parent f11bdf0 commit f992066

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

datafusion/sql/src/unparser/dialect.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ pub trait Dialect {
4545
fn interval_style(&self) -> IntervalStyle {
4646
IntervalStyle::PostgresVerbose
4747
}
48+
49+
// Does the dialect use CHAR to cast Utf8 rather than TEXT?
50+
// E.g. MySQL requires CHAR instead of TEXT and automatically produces a string with
51+
// the VARCHAR, TEXT or LONGTEXT data type based on the length of the string
52+
fn use_char_for_utf8_cast(&self) -> bool {
53+
false
54+
}
4855
}
4956

5057
/// `IntervalStyle` to use for unparsing
@@ -103,6 +110,10 @@ impl Dialect for MySqlDialect {
103110
fn interval_style(&self) -> IntervalStyle {
104111
IntervalStyle::MySQL
105112
}
113+
114+
fn use_char_for_utf8_cast(&self) -> bool {
115+
true
116+
}
106117
}
107118

108119
pub struct SqliteDialect {}
@@ -118,6 +129,7 @@ pub struct CustomDialect {
118129
supports_nulls_first_in_sort: bool,
119130
use_timestamp_for_date64: bool,
120131
interval_style: IntervalStyle,
132+
use_char_for_utf8_cast: bool,
121133
}
122134

123135
impl Default for CustomDialect {
@@ -127,6 +139,7 @@ impl Default for CustomDialect {
127139
supports_nulls_first_in_sort: true,
128140
use_timestamp_for_date64: false,
129141
interval_style: IntervalStyle::SQLStandard,
142+
use_char_for_utf8_cast: false,
130143
}
131144
}
132145
}
@@ -158,6 +171,10 @@ impl Dialect for CustomDialect {
158171
fn interval_style(&self) -> IntervalStyle {
159172
self.interval_style
160173
}
174+
175+
fn use_char_for_utf8_cast(&self) -> bool {
176+
self.use_char_for_utf8_cast
177+
}
161178
}
162179

163180
/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -179,6 +196,7 @@ pub struct CustomDialectBuilder {
179196
supports_nulls_first_in_sort: bool,
180197
use_timestamp_for_date64: bool,
181198
interval_style: IntervalStyle,
199+
use_char_for_utf8_cast: bool,
182200
}
183201

184202
impl Default for CustomDialectBuilder {
@@ -194,6 +212,7 @@ impl CustomDialectBuilder {
194212
supports_nulls_first_in_sort: true,
195213
use_timestamp_for_date64: false,
196214
interval_style: IntervalStyle::PostgresVerbose,
215+
use_char_for_utf8_cast: false,
197216
}
198217
}
199218

@@ -203,6 +222,7 @@ impl CustomDialectBuilder {
203222
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
204223
use_timestamp_for_date64: self.use_timestamp_for_date64,
205224
interval_style: self.interval_style,
225+
use_char_for_utf8_cast: self.use_char_for_utf8_cast,
206226
}
207227
}
208228

@@ -235,4 +255,9 @@ impl CustomDialectBuilder {
235255
self.interval_style = interval_style;
236256
self
237257
}
258+
259+
pub fn with_use_char_for_utf8_cast(mut self, use_char_for_utf8_cast: bool) -> Self {
260+
self.use_char_for_utf8_cast = use_char_for_utf8_cast;
261+
self
262+
}
238263
}

datafusion/sql/src/unparser/expr.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,8 +1272,16 @@ impl Unparser<'_> {
12721272
DataType::BinaryView => {
12731273
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
12741274
}
1275-
DataType::Utf8 => Ok(ast::DataType::Varchar(None)),
1276-
DataType::LargeUtf8 => Ok(ast::DataType::Text),
1275+
DataType::Utf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
1276+
ast::DataType::Char(None)
1277+
} else {
1278+
ast::DataType::Varchar(None)
1279+
}),
1280+
DataType::LargeUtf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
1281+
ast::DataType::Char(None)
1282+
} else {
1283+
ast::DataType::Text
1284+
}),
12771285
DataType::Utf8View => {
12781286
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
12791287
}
@@ -1933,4 +1941,31 @@ mod tests {
19331941
assert_eq!(actual, expected);
19341942
}
19351943
}
1944+
1945+
#[test]
1946+
fn custom_dialect_use_char_for_utf8_cast() -> Result<()> {
1947+
for (use_char_for_utf8_cast, data_type, identifier) in [
1948+
(false, DataType::Utf8, "VARCHAR"),
1949+
(true, DataType::Utf8, "CHAR"),
1950+
(false, DataType::LargeUtf8, "TEXT"),
1951+
(true, DataType::LargeUtf8, "CHAR"),
1952+
] {
1953+
let dialect = CustomDialectBuilder::new()
1954+
.with_use_char_for_utf8_cast(use_char_for_utf8_cast)
1955+
.build();
1956+
let unparser = Unparser::new(&dialect);
1957+
1958+
let expr = Expr::Cast(Cast {
1959+
expr: Box::new(col("a")),
1960+
data_type,
1961+
});
1962+
let ast = unparser.expr_to_sql(&expr)?;
1963+
1964+
let actual = format!("{}", ast);
1965+
let expected = format!(r#"CAST(a AS {identifier})"#);
1966+
1967+
assert_eq!(actual, expected);
1968+
}
1969+
Ok(())
1970+
}
19361971
}

0 commit comments

Comments
 (0)