1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ use std:: sync:: Arc ;
19+
20+ use arrow_schema:: TimeUnit ;
1821use regex:: Regex ;
19- use sqlparser:: { ast, keywords:: ALL_KEYWORDS } ;
22+ use sqlparser:: {
23+ ast:: { self , Ident , ObjectName , TimezoneInfo } ,
24+ keywords:: ALL_KEYWORDS ,
25+ } ;
2026
2127/// `Dialect` to use for Unparsing
2228///
@@ -36,8 +42,8 @@ pub trait Dialect: Send + Sync {
3642 true
3743 }
3844
39- // Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
40- // E.g. Trino, Athena and Dremio does not have DATETIME data type
45+ /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
46+ /// E.g. Trino, Athena and Dremio does not have DATETIME data type
4147 fn use_timestamp_for_date64 ( & self ) -> bool {
4248 false
4349 }
@@ -46,23 +52,50 @@ pub trait Dialect: Send + Sync {
4652 IntervalStyle :: PostgresVerbose
4753 }
4854
49- // Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
50- // E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
55+ /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
56+ /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
5157 fn float64_ast_dtype ( & self ) -> sqlparser:: ast:: DataType {
5258 sqlparser:: ast:: DataType :: Double
5359 }
5460
55- // The SQL type to use for Arrow Utf8 unparsing
56- // Most dialects use VARCHAR, but some, like MySQL, require CHAR
61+ /// The SQL type to use for Arrow Utf8 unparsing
62+ /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
5763 fn utf8_cast_dtype ( & self ) -> ast:: DataType {
5864 ast:: DataType :: Varchar ( None )
5965 }
6066
61- // The SQL type to use for Arrow LargeUtf8 unparsing
62- // Most dialects use TEXT, but some, like MySQL, require CHAR
67+ /// The SQL type to use for Arrow LargeUtf8 unparsing
68+ /// Most dialects use TEXT, but some, like MySQL, require CHAR
6369 fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
6470 ast:: DataType :: Text
6571 }
72+
73+ /// The date field extract style to use: `DateFieldExtractStyle`
74+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
75+ DateFieldExtractStyle :: DatePart
76+ }
77+
78+ /// The SQL type to use for Arrow Int64 unparsing
79+ /// Most dialects use BigInt, but some, like MySQL, require SIGNED
80+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
81+ ast:: DataType :: BigInt ( None )
82+ }
83+
84+ /// The SQL type to use for Timestamp unparsing
85+ /// Most dialects use Timestamp, but some, like MySQL, require Datetime
86+ /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
87+ fn timestamp_cast_dtype (
88+ & self ,
89+ _time_unit : & TimeUnit ,
90+ tz : & Option < Arc < str > > ,
91+ ) -> ast:: DataType {
92+ let tz_info = match tz {
93+ Some ( _) => TimezoneInfo :: WithTimeZone ,
94+ None => TimezoneInfo :: None ,
95+ } ;
96+
97+ ast:: DataType :: Timestamp ( None , tz_info)
98+ }
6699}
67100
68101/// `IntervalStyle` to use for unparsing
@@ -80,6 +113,19 @@ pub enum IntervalStyle {
80113 MySQL ,
81114}
82115
116+ /// Datetime subfield extraction style for unparsing
117+ ///
118+ /// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
119+ /// Different DBMSs follow different standards; popular ones are:
120+ /// date_part('YEAR', date '2001-02-16')
121+ /// EXTRACT(YEAR from date '2001-02-16')
122+ /// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
123+ #[ derive( Clone , Copy , PartialEq ) ]
124+ pub enum DateFieldExtractStyle {
125+ DatePart ,
126+ Extract ,
127+ }
128+
83129pub struct DefaultDialect { }
84130
85131impl Dialect for DefaultDialect {
@@ -133,6 +179,22 @@ impl Dialect for MySqlDialect {
133179 fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
134180 ast:: DataType :: Char ( None )
135181 }
182+
183+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
184+ DateFieldExtractStyle :: Extract
185+ }
186+
187+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
188+ ast:: DataType :: Custom ( ObjectName ( vec ! [ Ident :: new( "SIGNED" ) ] ) , vec ! [ ] )
189+ }
190+
191+ fn timestamp_cast_dtype (
192+ & self ,
193+ _time_unit : & TimeUnit ,
194+ _tz : & Option < Arc < str > > ,
195+ ) -> ast:: DataType {
196+ ast:: DataType :: Datetime ( None )
197+ }
136198}
137199
138200pub struct SqliteDialect { }
@@ -151,6 +213,10 @@ pub struct CustomDialect {
151213 float64_ast_dtype : sqlparser:: ast:: DataType ,
152214 utf8_cast_dtype : ast:: DataType ,
153215 large_utf8_cast_dtype : ast:: DataType ,
216+ date_field_extract_style : DateFieldExtractStyle ,
217+ int64_cast_dtype : ast:: DataType ,
218+ timestamp_cast_dtype : ast:: DataType ,
219+ timestamp_tz_cast_dtype : ast:: DataType ,
154220}
155221
156222impl Default for CustomDialect {
@@ -163,6 +229,13 @@ impl Default for CustomDialect {
163229 float64_ast_dtype : sqlparser:: ast:: DataType :: Double ,
164230 utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
165231 large_utf8_cast_dtype : ast:: DataType :: Text ,
232+ date_field_extract_style : DateFieldExtractStyle :: DatePart ,
233+ int64_cast_dtype : ast:: DataType :: BigInt ( None ) ,
234+ timestamp_cast_dtype : ast:: DataType :: Timestamp ( None , TimezoneInfo :: None ) ,
235+ timestamp_tz_cast_dtype : ast:: DataType :: Timestamp (
236+ None ,
237+ TimezoneInfo :: WithTimeZone ,
238+ ) ,
166239 }
167240 }
168241}
@@ -206,6 +279,26 @@ impl Dialect for CustomDialect {
206279 fn large_utf8_cast_dtype ( & self ) -> ast:: DataType {
207280 self . large_utf8_cast_dtype . clone ( )
208281 }
282+
283+ fn date_field_extract_style ( & self ) -> DateFieldExtractStyle {
284+ self . date_field_extract_style
285+ }
286+
287+ fn int64_cast_dtype ( & self ) -> ast:: DataType {
288+ self . int64_cast_dtype . clone ( )
289+ }
290+
291+ fn timestamp_cast_dtype (
292+ & self ,
293+ _time_unit : & TimeUnit ,
294+ tz : & Option < Arc < str > > ,
295+ ) -> ast:: DataType {
296+ if tz. is_some ( ) {
297+ self . timestamp_tz_cast_dtype . clone ( )
298+ } else {
299+ self . timestamp_cast_dtype . clone ( )
300+ }
301+ }
209302}
210303
211304/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
@@ -230,6 +323,10 @@ pub struct CustomDialectBuilder {
230323 float64_ast_dtype : sqlparser:: ast:: DataType ,
231324 utf8_cast_dtype : ast:: DataType ,
232325 large_utf8_cast_dtype : ast:: DataType ,
326+ date_field_extract_style : DateFieldExtractStyle ,
327+ int64_cast_dtype : ast:: DataType ,
328+ timestamp_cast_dtype : ast:: DataType ,
329+ timestamp_tz_cast_dtype : ast:: DataType ,
233330}
234331
235332impl Default for CustomDialectBuilder {
@@ -248,6 +345,13 @@ impl CustomDialectBuilder {
248345 float64_ast_dtype : sqlparser:: ast:: DataType :: Double ,
249346 utf8_cast_dtype : ast:: DataType :: Varchar ( None ) ,
250347 large_utf8_cast_dtype : ast:: DataType :: Text ,
348+ date_field_extract_style : DateFieldExtractStyle :: DatePart ,
349+ int64_cast_dtype : ast:: DataType :: BigInt ( None ) ,
350+ timestamp_cast_dtype : ast:: DataType :: Timestamp ( None , TimezoneInfo :: None ) ,
351+ timestamp_tz_cast_dtype : ast:: DataType :: Timestamp (
352+ None ,
353+ TimezoneInfo :: WithTimeZone ,
354+ ) ,
251355 }
252356 }
253357
@@ -260,6 +364,10 @@ impl CustomDialectBuilder {
260364 float64_ast_dtype : self . float64_ast_dtype ,
261365 utf8_cast_dtype : self . utf8_cast_dtype ,
262366 large_utf8_cast_dtype : self . large_utf8_cast_dtype ,
367+ date_field_extract_style : self . date_field_extract_style ,
368+ int64_cast_dtype : self . int64_cast_dtype ,
369+ timestamp_cast_dtype : self . timestamp_cast_dtype ,
370+ timestamp_tz_cast_dtype : self . timestamp_tz_cast_dtype ,
263371 }
264372 }
265373
@@ -293,6 +401,7 @@ impl CustomDialectBuilder {
293401 self
294402 }
295403
404+ /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
296405 pub fn with_float64_ast_dtype (
297406 mut self ,
298407 float64_ast_dtype : sqlparser:: ast:: DataType ,
@@ -301,16 +410,44 @@ impl CustomDialectBuilder {
301410 self
302411 }
303412
413+ /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
304414 pub fn with_utf8_cast_dtype ( mut self , utf8_cast_dtype : ast:: DataType ) -> Self {
305415 self . utf8_cast_dtype = utf8_cast_dtype;
306416 self
307417 }
308418
419+ /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
309420 pub fn with_large_utf8_cast_dtype (
310421 mut self ,
311422 large_utf8_cast_dtype : ast:: DataType ,
312423 ) -> Self {
313424 self . large_utf8_cast_dtype = large_utf8_cast_dtype;
314425 self
315426 }
427+
428+ /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
429+ pub fn with_date_field_extract_style (
430+ mut self ,
431+ date_field_extract_style : DateFieldExtractStyle ,
432+ ) -> Self {
433+ self . date_field_extract_style = date_field_extract_style;
434+ self
435+ }
436+
437+ /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
438+ pub fn with_int64_cast_dtype ( mut self , int64_cast_dtype : ast:: DataType ) -> Self {
439+ self . int64_cast_dtype = int64_cast_dtype;
440+ self
441+ }
442+
443+ /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
444+ pub fn with_timestamp_cast_dtype (
445+ mut self ,
446+ timestamp_cast_dtype : ast:: DataType ,
447+ timestamp_tz_cast_dtype : ast:: DataType ,
448+ ) -> Self {
449+ self . timestamp_cast_dtype = timestamp_cast_dtype;
450+ self . timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
451+ self
452+ }
316453}
0 commit comments