Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/queries/clickbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ WHERE
THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT
ELSE 0
END > 1920 -- Extract and validate resolution parameter
AND levenshtein("UTMSource", "UTMCampaign") < 3 -- Verify UTM parameter similarity
AND levenshtein(CAST("UTMSource" AS STRING), CAST("UTMCampaign" AS STRING)) < 3 -- Verify UTM parameter similarity
```
Result is empty,Since it has already been filtered by `"SocialAction" = 'share'`.

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/queries/clickbench/extended.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ SELECT "BrowserCountry", COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitCo
SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice") FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10;
SELECT "ClientIP", "WatchID", COUNT(*) c, MIN("ResponseStartTiming") tmin, MEDIAN("ResponseStartTiming") tmed, MAX("ResponseStartTiming") tmax FROM hits WHERE "JavaEnable" = 0 GROUP BY "ClientIP", "WatchID" HAVING c > 1 ORDER BY tmed DESC LIMIT 10;
SELECT "ClientIP", "WatchID", COUNT(*) c, MIN("ResponseStartTiming") tmin, APPROX_PERCENTILE_CONT("ResponseStartTiming", 0.95) tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP BY "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10;
SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND "MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND "SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 1920 AND levenshtein("UTMSource", "UTMCampaign") < 3;
SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND "MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND "SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 1920 AND levenshtein(CAST("UTMSource" AS STRING), CAST("UTMCampaign" AS STRING)) < 3;
40 changes: 30 additions & 10 deletions datafusion/sql/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//! This parser implements DataFusion specific statements such as
//! `CREATE EXTERNAL TABLE`

use datafusion_common::config::SqlParserOptions;
use datafusion_common::DataFusionError;
use datafusion_common::{sql_err, Diagnostic, Span};
use sqlparser::ast::{ExprWithAlias, OrderByOptions};
Expand Down Expand Up @@ -284,6 +285,7 @@ fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionErro
/// [`Statement`] for a list of this special syntax
pub struct DFParser<'a> {
pub parser: Parser<'a>,
options: SqlParserOptions,
}

/// Same as `sqlparser`
Expand Down Expand Up @@ -366,6 +368,10 @@ impl<'a> DFParserBuilder<'a> {
parser: Parser::new(self.dialect)
.with_tokens_with_locations(tokens)
.with_recursion_limit(self.recursion_limit),
options: SqlParserOptions {
recursion_limit: self.recursion_limit,
..Default::default()
},
})
}
}
Expand Down Expand Up @@ -471,9 +477,7 @@ impl<'a> DFParser<'a> {
if let Token::Word(w) = self.parser.peek_nth_token(1).token {
// use native parser for COPY INTO
if w.keyword == Keyword::INTO {
return Ok(Statement::Statement(Box::from(
self.parser.parse_statement()?,
)));
return self.parse_and_handle_statement();
}
}
self.parser.next_token(); // COPY
Expand All @@ -485,17 +489,13 @@ impl<'a> DFParser<'a> {
}
_ => {
// use sqlparser-rs parser
Ok(Statement::Statement(Box::from(
self.parser.parse_statement()?,
)))
self.parse_and_handle_statement()
}
}
}
_ => {
// use the native parser
Ok(Statement::Statement(Box::from(
self.parser.parse_statement()?,
)))
self.parse_and_handle_statement()
}
}
}
Expand All @@ -513,6 +513,23 @@ impl<'a> DFParser<'a> {
Ok(self.parser.parse_expr_with_alias()?)
}

/// Helper method to parse a statement and handle errors consistently, especially for recursion limits
fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
self.parser
.parse_statement()
.map(|stmt| Statement::Statement(Box::from(stmt)))
.map_err(|e| match e {
ParserError::RecursionLimitExceeded => DataFusionError::SQL(
ParserError::RecursionLimitExceeded,
Some(format!(
" (current limit: {})",
self.options.recursion_limit
)),
),
other => DataFusionError::SQL(other, None),
})
}

/// Parse a SQL `COPY TO` statement
pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
// parse as a query
Expand Down Expand Up @@ -1760,6 +1777,9 @@ mod tests {
.parse_statements()
.unwrap_err();

assert_contains!(err.to_string(), "SQL error: RecursionLimitExceeded");
assert_contains!(
err.to_string(),
"SQL error: RecursionLimitExceeded (current limit: 1)"
);
}
}
13 changes: 7 additions & 6 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -4998,20 +4998,21 @@ create table d as values
(NULL, NULL, NULL, NULL, 1),
(NULL, NULL, NULL, NULL, 2);

query I?

query I? rowsort
SELECT column5, avg(column1) FROM d GROUP BY column5;
----
2 0 days 0 hours 0 mins 15 secs
1 0 days 0 hours 0 mins 6 secs
2 0 days 0 hours 0 mins 15 secs

query I??
SELECT column5, column1, avg(column1) OVER (PARTITION BY column5 ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) as window_avg
query I?? rowsort
SELECT column5, column1, avg(column1) OVER (PARTITION BY column5 ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) as window_avg
FROM d WHERE column1 IS NOT NULL;
----
2 0 days 0 hours 0 mins 5 secs 0 days 0 hours 0 mins 5 secs
2 0 days 0 hours 0 mins 25 secs 0 days 0 hours 0 mins 15 secs
1 0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 1 secs
1 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 6 secs
2 0 days 0 hours 0 mins 25 secs 0 days 0 hours 0 mins 15 secs
2 0 days 0 hours 0 mins 5 secs 0 days 0 hours 0 mins 5 secs

# Cumulative average window function
query I??
Expand Down
Loading