Skip to content

Commit e381bf6

Browse files
iffyiolustefaniak
authored andcommitted
Support for BigQuery struct, array and bytes , int64, float64 datatypes (apache#1003)
1 parent 8302d04 commit e381bf6

File tree

8 files changed

+910
-76
lines changed

8 files changed

+910
-76
lines changed

src/ast/data_type.rs

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize};
2020
#[cfg(feature = "visitor")]
2121
use sqlparser_derive::{Visit, VisitMut};
2222

23-
use crate::ast::ObjectName;
23+
use crate::ast::{display_comma_separated, ObjectName, StructField};
2424

2525
use super::value::escape_single_quote_string;
2626

@@ -71,6 +71,10 @@ pub enum DataType {
7171
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
7272
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
7373
Blob(Option<u64>),
74+
/// Variable-length binary data with optional length.
75+
///
76+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#bytes_type
77+
Bytes(Option<u64>),
7478
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
7579
///
7680
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
@@ -125,6 +129,10 @@ pub enum DataType {
125129
///
126130
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
127131
Int4(Option<u64>),
132+
/// Integer type in [bigquery]
133+
///
134+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
135+
Int64,
128136
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
129137
Integer(Option<u64>),
130138
/// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
@@ -149,6 +157,10 @@ pub enum DataType {
149157
///
150158
/// [postgresql]: https://www.postgresql.org/docs/15/datatype.html
151159
Float4,
160+
/// Floating point in [bigquery]
161+
///
162+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
163+
Float64,
152164
/// Floating point e.g. REAL
153165
Real,
154166
/// Float8 as alias for Double in [postgresql]
@@ -190,18 +202,23 @@ pub enum DataType {
190202
Regclass,
191203
/// Text
192204
Text,
193-
/// String
194-
String,
205+
/// String with optional length.
206+
String(Option<u64>),
195207
/// Bytea
196208
Bytea,
197209
/// Custom type such as enums
198210
Custom(ObjectName, Vec<String>),
199211
/// Arrays
200-
Array(Option<Box<DataType>>),
212+
Array(ArrayElemTypeDef),
201213
/// Enums
202214
Enum(Vec<String>),
203215
/// Set
204216
Set(Vec<String>),
217+
/// Struct
218+
///
219+
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
220+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
221+
Struct(Vec<StructField>),
205222
}
206223

207224
impl fmt::Display for DataType {
@@ -231,6 +248,7 @@ impl fmt::Display for DataType {
231248
format_type_with_optional_length(f, "VARBINARY", size, false)
232249
}
233250
DataType::Blob(size) => format_type_with_optional_length(f, "BLOB", size, false),
251+
DataType::Bytes(size) => format_type_with_optional_length(f, "BYTES", size, false),
234252
DataType::Numeric(info) => {
235253
write!(f, "NUMERIC{info}")
236254
}
@@ -274,6 +292,9 @@ impl fmt::Display for DataType {
274292
DataType::Int4(zerofill) => {
275293
format_type_with_optional_length(f, "INT4", zerofill, false)
276294
}
295+
DataType::Int64 => {
296+
write!(f, "INT64")
297+
}
277298
DataType::UnsignedInt4(zerofill) => {
278299
format_type_with_optional_length(f, "INT4", zerofill, true)
279300
}
@@ -297,6 +318,7 @@ impl fmt::Display for DataType {
297318
}
298319
DataType::Real => write!(f, "REAL"),
299320
DataType::Float4 => write!(f, "FLOAT4"),
321+
DataType::Float64 => write!(f, "FLOAT64"),
300322
DataType::Double => write!(f, "DOUBLE"),
301323
DataType::Float8 => write!(f, "FLOAT8"),
302324
DataType::DoublePrecision => write!(f, "DOUBLE PRECISION"),
@@ -316,15 +338,13 @@ impl fmt::Display for DataType {
316338
DataType::JSON => write!(f, "JSON"),
317339
DataType::Regclass => write!(f, "REGCLASS"),
318340
DataType::Text => write!(f, "TEXT"),
319-
DataType::String => write!(f, "STRING"),
341+
DataType::String(size) => format_type_with_optional_length(f, "STRING", size, false),
320342
DataType::Bytea => write!(f, "BYTEA"),
321-
DataType::Array(ty) => {
322-
if let Some(t) = &ty {
323-
write!(f, "{t}[]")
324-
} else {
325-
write!(f, "ARRAY")
326-
}
327-
}
343+
DataType::Array(ty) => match ty {
344+
ArrayElemTypeDef::None => write!(f, "ARRAY"),
345+
ArrayElemTypeDef::SquareBracket(t) => write!(f, "{t}[]"),
346+
ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"),
347+
},
328348
DataType::Custom(ty, modifiers) => {
329349
if modifiers.is_empty() {
330350
write!(f, "{ty}")
@@ -352,6 +372,13 @@ impl fmt::Display for DataType {
352372
}
353373
write!(f, ")")
354374
}
375+
DataType::Struct(fields) => {
376+
if !fields.is_empty() {
377+
write!(f, "STRUCT<{}>", display_comma_separated(fields))
378+
} else {
379+
write!(f, "STRUCT")
380+
}
381+
}
355382
}
356383
}
357384
}
@@ -544,3 +571,19 @@ impl fmt::Display for CharLengthUnits {
544571
}
545572
}
546573
}
574+
575+
/// Represents the data type of the elements in an array (if any) as well as
576+
/// the syntax used to declare the array.
577+
///
578+
/// For example: Bigquery/Hive use `ARRAY<INT>` whereas snowflake uses ARRAY.
579+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
580+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
581+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
582+
pub enum ArrayElemTypeDef {
583+
/// `ARRAY`
584+
None,
585+
/// `ARRAY<INT>`
586+
AngleBracket(Box<DataType>),
587+
/// `[]INT`
588+
SquareBracket(Box<DataType>),
589+
}

src/ast/mod.rs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize};
2929
use sqlparser_derive::{Visit, VisitMut};
3030

3131
pub use self::data_type::{
32-
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
32+
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
3333
};
3434
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
3535
pub use self::ddl::{
@@ -360,6 +360,27 @@ impl fmt::Display for JsonOperator {
360360
}
361361
}
362362

363+
/// A field definition within a struct.
364+
///
365+
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
366+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
367+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
368+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
369+
pub struct StructField {
370+
pub field_name: Option<WithSpan<Ident>>,
371+
pub field_type: DataType,
372+
}
373+
374+
impl fmt::Display for StructField {
375+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
376+
if let Some(name) = &self.field_name {
377+
write!(f, "{name} {}", self.field_type)
378+
} else {
379+
write!(f, "{}", self.field_type)
380+
}
381+
}
382+
}
383+
363384
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
364385
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
365386
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
@@ -698,6 +719,29 @@ pub enum Expr {
698719
Rollup(Vec<Vec<Expr>>),
699720
/// ROW / TUPLE a single value, such as `SELECT (1, 2)`
700721
Tuple(Vec<Expr>),
722+
/// `BigQuery` specific `Struct` literal expression [1]
723+
/// Syntax:
724+
/// ```sql
725+
/// STRUCT<[field_name] field_type, ...>( expr1 [, ... ])
726+
/// ```
727+
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
728+
Struct {
729+
/// Struct values.
730+
values: Vec<Expr>,
731+
/// Struct field definitions.
732+
fields: Vec<StructField>,
733+
},
734+
/// `BigQuery` specific: An named expression in a typeless struct [1]
735+
///
736+
/// Syntax
737+
/// ```sql
738+
/// 1 AS A
739+
/// ```
740+
/// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
741+
Named {
742+
expr: Box<Expr>,
743+
name: WithSpan<Ident>,
744+
},
701745
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
702746
ArrayIndex { obj: Box<Expr>, indexes: Vec<Expr> },
703747
/// An array expression e.g. `ARRAY[1, 2]`
@@ -1093,6 +1137,21 @@ impl fmt::Display for Expr {
10931137
Expr::Tuple(exprs) => {
10941138
write!(f, "({})", display_comma_separated(exprs))
10951139
}
1140+
Expr::Struct { values, fields } => {
1141+
if !fields.is_empty() {
1142+
write!(
1143+
f,
1144+
"STRUCT<{}>({})",
1145+
display_comma_separated(fields),
1146+
display_comma_separated(values)
1147+
)
1148+
} else {
1149+
write!(f, "STRUCT({})", display_comma_separated(values))
1150+
}
1151+
}
1152+
Expr::Named { expr, name } => {
1153+
write!(f, "{} AS {}", expr, name)
1154+
}
10961155
Expr::ArrayIndex { obj, indexes } => {
10971156
write!(f, "{obj}")?;
10981157
for i in indexes {

src/keywords.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ define_keywords!(
122122
BY,
123123
BYPASSRLS,
124124
BYTEA,
125+
BYTES,
125126
CACHE,
126127
CALL,
127128
CALLED,
@@ -274,6 +275,7 @@ define_keywords!(
274275
FIRST_VALUE,
275276
FLOAT,
276277
FLOAT4,
278+
FLOAT64,
277279
FLOAT8,
278280
FLOOR,
279281
FOLLOWING,
@@ -297,6 +299,7 @@ define_keywords!(
297299
FUSION,
298300
GENERATE,
299301
GENERATED,
302+
GEOGRAPHY,
300303
GET,
301304
GLOBAL,
302305
GRANT,
@@ -333,6 +336,7 @@ define_keywords!(
333336
INT,
334337
INT2,
335338
INT4,
339+
INT64,
336340
INT8,
337341
INTEGER,
338342
INTERSECT,
@@ -591,6 +595,7 @@ define_keywords!(
591595
STORED,
592596
STRICT,
593597
STRING,
598+
STRUCT,
594599
SUBMULTISET,
595600
SUBSTRING,
596601
SUBSTRING_REGEX,

0 commit comments

Comments
 (0)