1616// under the License.
1717
1818use regex:: Regex ;
19- use sqlparser:: keywords:: ALL_KEYWORDS ;
19+ use sqlparser:: { ast , keywords:: ALL_KEYWORDS } ;
2020
2121/// `Dialect` to use for Unparsing
2222///
@@ -45,6 +45,17 @@ pub trait Dialect {
4545 fn interval_style ( & self ) -> IntervalStyle {
4646 IntervalStyle :: PostgresVerbose
4747 }
48+
49+ // The SQL type to use for Arrow Utf8 unparsing
50+ // Most dialects use VARCHAR, but some, like MySQL, require CHAR
51+ fn utf8_cast_dtype ( & self ) -> ast:: DataType {
52+ ast:: DataType :: Varchar ( None )
53+ }
54+ // The SQL type to use for Arrow LargeUtf8 unparsing
55+ // Most dialects use TEXT, but some, like MySQL, require CHAR
56+ fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
57+ ast:: DataType :: Text
58+ }
4859}
4960
5061/// `IntervalStyle` to use for unparsing
@@ -103,6 +114,14 @@ impl Dialect for MySqlDialect {
103114 fn interval_style ( & self ) -> IntervalStyle {
104115 IntervalStyle :: MySQL
105116 }
117+
118+ fn utf8_cast_dtype ( & self ) -> ast:: DataType {
119+ ast:: DataType :: Char ( None )
120+ }
121+
122+ fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
123+ ast:: DataType :: Char ( None )
124+ }
106125}
107126
108127pub struct SqliteDialect { }
@@ -118,6 +137,8 @@ pub struct CustomDialect {
118137 supports_nulls_first_in_sort : bool ,
119138 use_timestamp_for_date64 : bool ,
120139 interval_style : IntervalStyle ,
140+ utf8_cast_dtype : ast:: DataType ,
141+ large_utf8_cast_dtype : ast:: DataType ,
121142}
122143
123144impl Default for CustomDialect {
@@ -127,6 +148,8 @@ impl Default for CustomDialect {
127148 supports_nulls_first_in_sort : true ,
128149 use_timestamp_for_date64 : false ,
129150 interval_style : IntervalStyle :: SQLStandard ,
151+ utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
152+ large_utf8_cast_dtype : ast:: DataType :: Text ,
130153 }
131154 }
132155}
@@ -158,6 +181,14 @@ impl Dialect for CustomDialect {
158181 fn interval_style ( & self ) -> IntervalStyle {
159182 self . interval_style
160183 }
184+
185+ fn utf8_cast_dtype ( & self ) -> ast:: DataType {
186+ self . utf8_cast_dtype . clone ( )
187+ }
188+
189+ fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
190+ self . large_utf8_cast_dtype . clone ( )
191+ }
161192}
162193
163194/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -179,6 +210,8 @@ pub struct CustomDialectBuilder {
179210 supports_nulls_first_in_sort : bool ,
180211 use_timestamp_for_date64 : bool ,
181212 interval_style : IntervalStyle ,
213+ utf8_cast_dtype : ast:: DataType ,
214+ large_utf8_cast_dtype : ast:: DataType ,
182215}
183216
184217impl Default for CustomDialectBuilder {
@@ -194,6 +227,8 @@ impl CustomDialectBuilder {
194227 supports_nulls_first_in_sort : true ,
195228 use_timestamp_for_date64 : false ,
196229 interval_style : IntervalStyle :: PostgresVerbose ,
230+ utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
231+ large_utf8_cast_dtype : ast:: DataType :: Text ,
197232 }
198233 }
199234
@@ -203,6 +238,8 @@ impl CustomDialectBuilder {
203238 supports_nulls_first_in_sort : self . supports_nulls_first_in_sort ,
204239 use_timestamp_for_date64 : self . use_timestamp_for_date64 ,
205240 interval_style : self . interval_style ,
241+ utf8_cast_dtype : self . utf8_cast_dtype ,
242+ large_utf8_cast_dtype : self . large_utf8_cast_dtype ,
206243 }
207244 }
208245
@@ -235,4 +272,17 @@ impl CustomDialectBuilder {
235272 self . interval_style = interval_style;
236273 self
237274 }
275+
276+ pub fn with_utf8_cast_dtype ( mut self , utf8_cast_dtype : ast:: DataType ) -> Self {
277+ self . utf8_cast_dtype = utf8_cast_dtype;
278+ self
279+ }
280+
281+ pub fn with_large_utf8_cast_dtype (
282+ mut self ,
283+ large_utf8_cast_dtype : ast:: DataType ,
284+ ) -> Self {
285+ self . large_utf8_cast_dtype = large_utf8_cast_dtype;
286+ self
287+ }
238288}
0 commit comments