Skip to content

Commit 09b107a

Browse files
committed
Add dialect param to use CHAR instead of TEXT for Utf8 unparsing for MySQL (#12)
1 parent 3c9a282 commit 09b107a

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

datafusion/sql/src/unparser/dialect.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ pub trait Dialect {
4545
fn interval_style(&self) -> IntervalStyle {
4646
IntervalStyle::PostgresVerbose
4747
}
48+
49+
// Does the dialect use CHAR to cast Utf8 rather than TEXT?
50+
// E.g. MySQL requires CHAR instead of TEXT and automatically produces a string with
51+
// the VARCHAR, TEXT or LONGTEXT data type based on the length of the string
52+
fn use_char_for_utf8_cast(&self) -> bool {
53+
false
54+
}
4855
}
4956

5057
/// `IntervalStyle` to use for unparsing
@@ -103,6 +110,10 @@ impl Dialect for MySqlDialect {
103110
fn interval_style(&self) -> IntervalStyle {
104111
IntervalStyle::MySQL
105112
}
113+
114+
fn use_char_for_utf8_cast(&self) -> bool {
115+
true
116+
}
106117
}
107118

108119
pub struct SqliteDialect {}
@@ -118,6 +129,7 @@ pub struct CustomDialect {
118129
supports_nulls_first_in_sort: bool,
119130
use_timestamp_for_date64: bool,
120131
interval_style: IntervalStyle,
132+
use_char_for_utf8_cast: bool,
121133
}
122134

123135
impl Default for CustomDialect {
@@ -127,6 +139,7 @@ impl Default for CustomDialect {
127139
supports_nulls_first_in_sort: true,
128140
use_timestamp_for_date64: false,
129141
interval_style: IntervalStyle::SQLStandard,
142+
use_char_for_utf8_cast: false,
130143
}
131144
}
132145
}
@@ -157,6 +170,10 @@ impl Dialect for CustomDialect {
157170
fn interval_style(&self) -> IntervalStyle {
158171
self.interval_style
159172
}
173+
174+
fn use_char_for_utf8_cast(&self) -> bool {
175+
self.use_char_for_utf8_cast
176+
}
160177
}
161178

162179
// create a CustomDialectBuilder
@@ -165,6 +182,7 @@ pub struct CustomDialectBuilder {
165182
supports_nulls_first_in_sort: bool,
166183
use_timestamp_for_date64: bool,
167184
interval_style: IntervalStyle,
185+
use_char_for_utf8_cast: bool,
168186
}
169187

170188
impl Default for CustomDialectBuilder {
@@ -180,6 +198,7 @@ impl CustomDialectBuilder {
180198
supports_nulls_first_in_sort: true,
181199
use_timestamp_for_date64: false,
182200
interval_style: IntervalStyle::PostgresVerbose,
201+
use_char_for_utf8_cast: false,
183202
}
184203
}
185204

@@ -189,6 +208,7 @@ impl CustomDialectBuilder {
189208
supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
190209
use_timestamp_for_date64: self.use_timestamp_for_date64,
191210
interval_style: self.interval_style,
211+
use_char_for_utf8_cast: self.use_char_for_utf8_cast,
192212
}
193213
}
194214

@@ -217,4 +237,9 @@ impl CustomDialectBuilder {
217237
self.interval_style = interval_style;
218238
self
219239
}
240+
241+
pub fn with_use_char_for_utf8_cast(mut self, use_char_for_utf8_cast: bool) -> Self {
242+
self.use_char_for_utf8_cast = use_char_for_utf8_cast;
243+
self
244+
}
220245
}

datafusion/sql/src/unparser/expr.rs

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,8 +1272,16 @@ impl Unparser<'_> {
12721272
DataType::BinaryView => {
12731273
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
12741274
}
1275-
DataType::Utf8 => Ok(ast::DataType::Varchar(None)),
1276-
DataType::LargeUtf8 => Ok(ast::DataType::Text),
1275+
DataType::Utf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
1276+
ast::DataType::Char(None)
1277+
} else {
1278+
ast::DataType::Varchar(None)
1279+
}),
1280+
DataType::LargeUtf8 => Ok(if self.dialect.use_char_for_utf8_cast() {
1281+
ast::DataType::Char(None)
1282+
} else {
1283+
ast::DataType::Text
1284+
}),
12771285
DataType::Utf8View => {
12781286
not_impl_err!("Unsupported DataType: conversion: {data_type:?}")
12791287
}
@@ -1932,4 +1940,31 @@ mod tests {
19321940
assert_eq!(actual, expected);
19331941
}
19341942
}
1943+
1944+
#[test]
1945+
fn custom_dialect_use_char_for_utf8_cast() -> Result<()> {
1946+
for (use_char_for_utf8_cast, data_type, identifier) in [
1947+
(false, DataType::Utf8, "VARCHAR"),
1948+
(true, DataType::Utf8, "CHAR"),
1949+
(false, DataType::LargeUtf8, "TEXT"),
1950+
(true, DataType::LargeUtf8, "CHAR"),
1951+
] {
1952+
let dialect = CustomDialectBuilder::new()
1953+
.with_use_char_for_utf8_cast(use_char_for_utf8_cast)
1954+
.build();
1955+
let unparser = Unparser::new(&dialect);
1956+
1957+
let expr = Expr::Cast(Cast {
1958+
expr: Box::new(col("a")),
1959+
data_type,
1960+
});
1961+
let ast = unparser.expr_to_sql(&expr)?;
1962+
1963+
let actual = format!("{}", ast);
1964+
let expected = format!(r#"CAST(a AS {identifier})"#);
1965+
1966+
assert_eq!(actual, expected);
1967+
}
1968+
Ok(())
1969+
}
19351970
}

0 commit comments

Comments
 (0)