Skip to content

Commit 2f437db

Browse files
authored
Support for BigQuery struct, array and bytes , int64, float64 datatypes (#1003)
1 parent 65317ed commit 2f437db

File tree

8 files changed

+901
-65
lines changed

8 files changed

+901
-65
lines changed

src/ast/data_type.rs

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
2020
#[cfg(feature = "visitor")]
2121
use sqlparser_derive::{Visit, VisitMut};
2222

23-
use crate::ast::ObjectName;
23+
use crate::ast::{display_comma_separated, ObjectName, StructField};
2424

2525
use super::value::escape_single_quote_string;
2626

@@ -71,6 +71,10 @@ pub enum DataType {
7171
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
7272
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
7373
Blob(Option<u64>),
74+
/// Variable-length binary data with optional length.
75+
///
76+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type
77+
Bytes(Option<u64>),
7478
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
7579
///
7680
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
@@ -125,6 +129,10 @@ pub enum DataType {
125129
///
126130
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
127131
Int4(Option<u64>),
132+
/// Integer type in [bigquery]
133+
///
134+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
135+
Int64,
128136
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
129137
Integer(Option<u64>),
130138
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@@ -149,6 +157,10 @@ pub enum DataType {
149157
///
150158
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
151159
Float4,
160+
/// Floating point in [bigquery]
161+
///
162+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
163+
Float64,
152164
/// Floating point e.g. REAL
153165
Real,
154166
/// Float8 as alias for Double in [postgresql]
@@ -190,18 +202,23 @@ pub enum DataType {
190202
Regclass,
191203
/// Text
192204
Text,
193-
/// String
194-
String,
205+
/// String with optional length.
206+
String(Option<u64>),
195207
/// Bytea
196208
Bytea,
197209
/// Custom type such as enums
198210
Custom(ObjectName, Vec<String>),
199211
/// Arrays
200-
Array(Option<Box<DataType>>),
212+
Array(ArrayElemTypeDef),
201213
/// Enums
202214
Enum(Vec<String>),
203215
/// Set
204216
Set(Vec<String>),
217+
/// Struct
218+
///
219+
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
220+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
221+
Struct(Vec<StructField>),
205222
}
206223

207224
impl fmt::Display for DataType {
@@ -231,6 +248,7 @@ impl fmt::Display for DataType {
231248
format_type_with_optional_length(f, "VARBINARY", size, false)
232249
}
233250
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
251+
DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false),
234252
DataType::Numeric(info) => {
235253
write!(f, "NUMERIC{info}")
236254
}
@@ -274,6 +292,9 @@ impl fmt::Display for DataType {
274292
DataType::Int4(zerofill) => {
275293
format_type_with_optional_length(f, "INT4", zerofill, false)
276294
}
295+
DataType::Int64 => {
296+
write!(f, "INT64")
297+
}
277298
DataType::UnsignedInt4(zerofill) => {
278299
format_type_with_optional_length(f, "INT4", zerofill, true)
279300
}
@@ -297,6 +318,7 @@ impl fmt::Display for DataType {
297318
}
298319
DataType::Real => write!(f, "REAL"),
299320
DataType::Float4 => write!(f, "FLOAT4"),
321+
DataType::Float64 => write!(f, "FLOAT64"),
300322
DataType::Double => write!(f, "DOUBLE"),
301323
DataType::Float8 => write!(f, "FLOAT8"),
302324
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
@@ -316,15 +338,13 @@ impl fmt::Display for DataType {
316338
DataType::JSON => write!(f, "JSON"),
317339
DataType::Regclass => write!(f, "REGCLASS"),
318340
DataType::Text => write!(f, "TEXT"),
319-
DataType::String => write!(f, "STRING"),
341+
DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false),
320342
DataType::Bytea => write!(f, "BYTEA"),
321-
DataType::Array(ty) => {
322-
if let Some(t) = &ty {
323-
write!(f, "{t}[]")
324-
} else {
325-
write!(f, "ARRAY")
326-
}
327-
}
343+
DataType::Array(ty) => match ty {
344+
ArrayElemTypeDef::None => write!(f, "ARRAY"),
345+
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
346+
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
347+
},
328348
DataType::Custom(ty, modifiers) => {
329349
if modifiers.is_empty() {
330350
write!(f, "{ty}")
@@ -352,6 +372,13 @@ impl fmt::Display for DataType {
352372
}
353373
write!(f, ")")
354374
}
375+
DataType::Struct(fields) => {
376+
if !fields.is_empty() {
377+
write!(f, "STRUCT<{}>", display_comma_separated(fields))
378+
} else {
379+
write!(f, "STRUCT")
380+
}
381+
}
355382
}
356383
}
357384
}
@@ -533,3 +560,19 @@ impl fmt::Display for CharLengthUnits {
533560
}
534561
}
535562
}
563+
564+
/// Represents the data type of the elements in an array (if any) as well as
565+
/// the syntax used to declare the array.
566+
///
567+
/// For example: Bigquery/Hive use `ARRAY<INT>` whereas snowflake uses ARRAY.
568+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
569+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
570+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
571+
pub enum ArrayElemTypeDef {
572+
/// `ARRAY`
573+
None,
574+
/// `ARRAY<INT>`
575+
AngleBracket(Box<DataType>),
576+
/// `[]INT`
577+
SquareBracket(Box<DataType>),
578+
}

src/ast/mod.rs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use serde::{Deserialize, Serialize};
2626
use sqlparser_derive::{Visit, VisitMut};
2727

2828
pub use self::data_type::{
29-
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
29+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
3030
};
3131
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3232
pub use self::ddl::{
@@ -323,6 +323,27 @@ impl fmt::Display for JsonOperator {
323323
}
324324
}
325325

326+
/// A field definition within a struct.
327+
///
328+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
329+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
330+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
331+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
332+
pub struct StructField {
333+
pub field_name: Option<Ident>,
334+
pub field_type: DataType,
335+
}
336+
337+
impl fmt::Display for StructField {
338+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
339+
if let Some(name) = &self.field_name {
340+
write!(f, "{name} {}", self.field_type)
341+
} else {
342+
write!(f, "{}", self.field_type)
343+
}
344+
}
345+
}
346+
326347
/// Options for `CAST` / `TRY_CAST`
327348
/// BigQuery: <https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax>
328349
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
@@ -597,6 +618,26 @@ pub enum Expr {
597618
Rollup(Vec<Vec<Expr>>),
598619
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
599620
Tuple(Vec<Expr>),
621+
/// `BigQuery` specific `Struct` literal expression [1]
622+
/// Syntax:
623+
/// ```sql
624+
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
625+
/// ```
626+
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
627+
Struct {
628+
/// Struct values.
629+
values: Vec<Expr>,
630+
/// Struct field definitions.
631+
fields: Vec<StructField>,
632+
},
633+
/// `BigQuery` specific: An named expression in a typeless struct [1]
634+
///
635+
/// Syntax
636+
/// ```sql
637+
/// 1 AS A
638+
/// ```
639+
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
640+
Named { expr: Box<Expr>, name: Ident },
600641
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
601642
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
602643
/// An array expression e.g. `ARRAY[1, 2]`
@@ -997,6 +1038,21 @@ impl fmt::Display for Expr {
9971038
Expr::Tuple(exprs) => {
9981039
write!(f, "({})", display_comma_separated(exprs))
9991040
}
1041+
Expr::Struct { values, fields } => {
1042+
if !fields.is_empty() {
1043+
write!(
1044+
f,
1045+
"STRUCT<{}>({})",
1046+
display_comma_separated(fields),
1047+
display_comma_separated(values)
1048+
)
1049+
} else {
1050+
write!(f, "STRUCT({})", display_comma_separated(values))
1051+
}
1052+
}
1053+
Expr::Named { expr, name } => {
1054+
write!(f, "{} AS {}", expr, name)
1055+
}
10001056
Expr::ArrayIndex { obj, indexes } => {
10011057
write!(f, "{obj}")?;
10021058
for i in indexes {

src/keywords.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ define_keywords!(
120120
BY,
121121
BYPASSRLS,
122122
BYTEA,
123+
BYTES,
123124
CACHE,
124125
CALL,
125126
CALLED,
@@ -270,6 +271,7 @@ define_keywords!(
270271
FIRST_VALUE,
271272
FLOAT,
272273
FLOAT4,
274+
FLOAT64,
273275
FLOAT8,
274276
FLOOR,
275277
FOLLOWING,
@@ -293,6 +295,7 @@ define_keywords!(
293295
FUSION,
294296
GENERATE,
295297
GENERATED,
298+
GEOGRAPHY,
296299
GET,
297300
GLOBAL,
298301
GRANT,
@@ -328,6 +331,7 @@ define_keywords!(
328331
INT,
329332
INT2,
330333
INT4,
334+
INT64,
331335
INT8,
332336
INTEGER,
333337
INTERSECT,
@@ -584,6 +588,7 @@ define_keywords!(
584588
STORED,
585589
STRICT,
586590
STRING,
591+
STRUCT,
587592
SUBMULTISET,
588593
SUBSTRING,
589594
SUBSTRING_REGEX,

0 commit comments

Comments
 (0)