Skip to content

Commit ca9c322

Browse files
Lordwormsalamb
andauthored
adding config to control Varchar behavior (apache#11090)
* adding config to control Varchar behavior * fix failed tests * fix config_md * format md * optimize code * format md * format md * adding config * Tweak documentation * Update sqllogictest * tweaks strings --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 3bd7200 commit ca9c322

File tree

7 files changed

+69
-1
lines changed

7 files changed

+69
-1
lines changed

datafusion/common/src/config.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ config_namespace! {
204204
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
205205
pub dialect: String, default = "generic".to_string()
206206

207+
/// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
208+
/// ignore the length. If false, error if a `VARCHAR` with a length is
209+
/// specified. The Arrow type system does not have a notion of maximum
210+
/// string length and thus DataFusion can not enforce such limits.
211+
pub support_varchar_with_length: bool, default = true
207212
}
208213
}
209214

@@ -303,6 +308,7 @@ config_namespace! {
303308
/// statistics into the same file groups.
304309
/// Currently experimental
305310
pub split_file_groups_by_statistics: bool, default = false
311+
306312
}
307313
}
308314

datafusion/core/src/execution/session_state.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ impl SessionState {
615615
ParserOptions {
616616
parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
617617
enable_ident_normalization: sql_parser_options.enable_ident_normalization,
618+
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
618619
}
619620
}
620621

datafusion/sql/src/planner.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,15 @@ pub trait ContextProvider {
103103
pub struct ParserOptions {
104104
pub parse_float_as_decimal: bool,
105105
pub enable_ident_normalization: bool,
106+
pub support_varchar_with_length: bool,
106107
}
107108

108109
impl Default for ParserOptions {
109110
fn default() -> Self {
110111
Self {
111112
parse_float_as_decimal: false,
112113
enable_ident_normalization: true,
114+
support_varchar_with_length: true,
113115
}
114116
}
115117
}
@@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
404406
SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | SQLDataType::UnsignedInt4(_) => {
405407
Ok(DataType::UInt32)
406408
}
409+
SQLDataType::Varchar(length) => {
410+
match (length, self.options.support_varchar_with_length) {
411+
(Some(_), false) => plan_err!("does not support Varchar with length, please set `support_varchar_with_length` to be true"),
412+
_ => Ok(DataType::Utf8),
413+
}
414+
}
407415
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => Ok(DataType::UInt64),
408416
SQLDataType::Float(_) => Ok(DataType::Float32),
409417
SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
410418
SQLDataType::Double | SQLDataType::DoublePrecision | SQLDataType::Float8 => Ok(DataType::Float64),
411419
SQLDataType::Char(_)
412-
| SQLDataType::Varchar(_)
413420
| SQLDataType::Text
414421
| SQLDataType::String(_) => Ok(DataType::Utf8),
415422
SQLDataType::Timestamp(None, tz_info) => {

datafusion/sql/tests/sql_integration.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ fn parse_decimals() {
8484
ParserOptions {
8585
parse_float_as_decimal: true,
8686
enable_ident_normalization: false,
87+
support_varchar_with_length: false,
8788
},
8889
);
8990
}
@@ -137,6 +138,7 @@ fn parse_ident_normalization() {
137138
ParserOptions {
138139
parse_float_as_decimal: false,
139140
enable_ident_normalization,
141+
support_varchar_with_length: false,
140142
},
141143
);
142144
if plan.is_ok() {

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true
237237
datafusion.sql_parser.dialect generic
238238
datafusion.sql_parser.enable_ident_normalization true
239239
datafusion.sql_parser.parse_float_as_decimal false
240+
datafusion.sql_parser.support_varchar_with_length true
240241

241242
# show all variables with verbose
242243
query TTT rowsort
@@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When set to true, the phy
318319
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
319320
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
320321
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
322+
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
321323

322324
# show_variable_in_config_options
323325
query TT

datafusion/sqllogictest/test_files/strings.slt

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,52 @@ e1
7878
p2
7979
p2e1
8080
p2m1e1
81+
82+
## VARCHAR with length support
83+
84+
# Lengths can be used by default
85+
query T
86+
SELECT '12345'::VARCHAR(2);
87+
----
88+
12345
89+
90+
# Lengths can not be used when the config setting is disabled
91+
92+
statement ok
93+
set datafusion.sql_parser.support_varchar_with_length = false;
94+
95+
query error
96+
SELECT '12345'::VARCHAR(2);
97+
98+
query error
99+
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s);
100+
101+
statement ok
102+
create table vals(s char) as values('abc'), ('def');
103+
104+
query error
105+
SELECT s::VARCHAR(2) FROM vals
106+
107+
# Lengths can be used when the config setting is enabled
108+
109+
statement ok
110+
set datafusion.sql_parser.support_varchar_with_length = true;
111+
112+
query T
113+
SELECT '12345'::VARCHAR(2)
114+
----
115+
12345
116+
117+
query T
118+
SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s)
119+
----
120+
12345
121+
122+
query T
123+
SELECT s::VARCHAR(2) FROM vals
124+
----
125+
abc
126+
def
127+
128+
statement ok
129+
drop table vals;

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig` initialisation so they mus
113113
| datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type |
114114
| datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) |
115115
| datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. |
116+
| datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |

0 commit comments

Comments
 (0)