Skip to content

Commit

Permalink
feat: support sample for table scan
Browse files Browse the repository at this point in the history
  • Loading branch information
xudong963 committed Aug 8, 2024
1 parent 125b431 commit 05abb61
Show file tree
Hide file tree
Showing 19 changed files with 239 additions and 11 deletions.
17 changes: 17 additions & 0 deletions src/query/ast/src/ast/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,23 @@ pub enum Literal {
Null,
}

impl Literal {
pub fn as_double(&self) -> Result<f64> {
match self {
Literal::UInt64(val) => Ok(*val as f64),
Literal::Float64(val) => Ok(*val),
Literal::Decimal256 { value, scale, .. } => {
let div = 10_f64.powi(*scale as i32);
Ok(value.as_f64() / div)
}
_ => Err(ParseError(
None,
format!("Cannot convert {:?} to double", self),
)),
}
}
}

impl Display for Literal {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match self {
Expand Down
6 changes: 6 additions & 0 deletions src/query/ast/src/ast/format/ast_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3327,6 +3327,7 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
consume,
pivot,
unpivot,
sample,
} => {
let mut name = String::new();
name.push_str("TableIdentifier ");
Expand Down Expand Up @@ -3354,6 +3355,11 @@ impl<'ast> Visitor<'ast> for AstFormatVisitor {
name.push_str(&unpivot.to_string());
}

if let Some(sample) = sample {
name.push(' ');
name.push_str(&sample.to_string());
}

let mut children = Vec::new();

if let Some(temporal) = temporal {
Expand Down
6 changes: 6 additions & 0 deletions src/query/ast/src/ast/format/syntax/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> {
consume,
pivot,
unpivot,
sample,
} => if let Some(catalog) = catalog {
RcDoc::text(catalog.to_string()).append(RcDoc::text("."))
} else {
Expand Down Expand Up @@ -353,6 +354,11 @@ pub(crate) fn pretty_table(table: TableReference) -> RcDoc<'static> {
} else {
RcDoc::nil()
})
.append(if let Some(sample) = sample {
RcDoc::text(format!(" {sample}"))
} else {
RcDoc::nil()
})
.append(if let Some(alias) = alias {
RcDoc::text(format!(" AS {alias}"))
} else {
Expand Down
40 changes: 40 additions & 0 deletions src/query/ast/src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use derive_visitor::Drive;
use derive_visitor::DriveMut;

use super::Lambda;
use super::Literal;
use crate::ast::write_comma_separated_list;
use crate::ast::write_dot_separated_list;
use crate::ast::Expr;
Expand Down Expand Up @@ -608,6 +609,39 @@ impl Display for TemporalClause {
}
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum SampleLevel {
ROW,
BLOCK,
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum SampleConfig {
Probability(Literal),
RowsNum(Literal),
}

#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub struct Sample {
pub sample_level: SampleLevel,
pub sample_conf: SampleConfig,
}

impl Display for Sample {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "SAMPLE ")?;
match self.sample_level {
SampleLevel::ROW => write!(f, "ROW ")?,
SampleLevel::BLOCK => write!(f, "BLOCK ")?,
}
match &self.sample_conf {
SampleConfig::Probability(prob) => write!(f, "({})", prob)?,
SampleConfig::RowsNum(rows) => write!(f, "({} ROWS)", rows)?,
}
Ok(())
}
}

/// A table name or a parenthesized subquery with an optional alias
#[derive(Debug, Clone, PartialEq, Drive, DriveMut)]
pub enum TableReference {
Expand All @@ -623,6 +657,7 @@ pub enum TableReference {
consume: bool,
pivot: Option<Box<Pivot>>,
unpivot: Option<Box<Unpivot>>,
sample: Option<Sample>,
},
// `TABLE(expr)[ AS alias ]`
TableFunction {
Expand Down Expand Up @@ -697,6 +732,7 @@ impl Display for TableReference {
consume,
pivot,
unpivot,
sample,
} => {
write_dot_separated_list(
f,
Expand All @@ -721,6 +757,10 @@ impl Display for TableReference {
if let Some(unpivot) = unpivot {
write!(f, " {unpivot}")?;
}

if let Some(sample) = sample {
write!(f, " {sample}")?;
}
}
TableReference::TableFunction {
span: _,
Expand Down
1 change: 1 addition & 0 deletions src/query/ast/src/ast/statements/merge_into.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ impl MergeSource {
consume: false,
pivot: None,
unpivot: None,
sample: None,
},
}
}
Expand Down
47 changes: 45 additions & 2 deletions src/query/ast/src/parser/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ pub enum TableReferenceElement {
consume: bool,
pivot: Option<Box<Pivot>>,
unpivot: Option<Box<Unpivot>>,
sample: Option<Sample>,
},
// `TABLE(expr)[ AS alias ]`
TableFunction {
Expand Down Expand Up @@ -741,9 +742,48 @@ pub fn table_reference_element(i: Input) -> IResult<WithSpan<TableReferenceEleme
);
let aliased_table = map(
rule! {
#dot_separated_idents_1_to_3 ~ #temporal_clause? ~ (WITH ~ CONSUME)? ~ #table_alias? ~ #pivot? ~ #unpivot?
#dot_separated_idents_1_to_3 ~ #temporal_clause? ~ (WITH ~ CONSUME)? ~ #table_alias? ~ #pivot? ~ #unpivot? ~ SAMPLE? ~ (ROW | BLOCK)? ~ ("(" ~ #expr ~ ROWS? ~ ")")?
},
|((catalog, database, table), temporal, opt_consume, alias, pivot, unpivot)| {
|(
(catalog, database, table),
temporal,
opt_consume,
alias,
pivot,
unpivot,
sample,
level,
sample_conf,
)| {
dbg!(&sample);
dbg!(&level);
dbg!(&sample_conf);
let mut table_sample = None;
if sample.is_some() {
let sample_level = match level {
// If the sample level is not specified, it defaults to ROW
Some(level) => match level.kind {
ROW => SampleLevel::ROW,
BLOCK => SampleLevel::BLOCK,
_ => unreachable!(),
},
None => SampleLevel::ROW,
};
let mut default_sample_conf = SampleConfig::Probability(Literal::Float64(100.0));
if let Some((_, expr, rows, _)) = sample_conf {
if let Expr::Literal { value, .. } = expr {
default_sample_conf = if rows.is_some() {
SampleConfig::RowsNum(value)
} else {
SampleConfig::Probability(value)
};
}
}
table_sample = Some(Sample {
sample_level,
sample_conf: default_sample_conf,
})
};
TableReferenceElement::Table {
catalog,
database,
Expand All @@ -753,6 +793,7 @@ pub fn table_reference_element(i: Input) -> IResult<WithSpan<TableReferenceEleme
consume: opt_consume.is_some(),
pivot: pivot.map(Box::new),
unpivot: unpivot.map(Box::new),
sample: table_sample,
}
},
);
Expand Down Expand Up @@ -864,6 +905,7 @@ impl<'a, I: Iterator<Item = WithSpan<'a, TableReferenceElement>>> PrattParser<I>
consume,
pivot,
unpivot,
sample,
} => TableReference::Table {
span: transform_span(input.span.tokens),
catalog,
Expand All @@ -874,6 +916,7 @@ impl<'a, I: Iterator<Item = WithSpan<'a, TableReferenceElement>>> PrattParser<I>
consume,
pivot,
unpivot,
sample,
},
TableReferenceElement::TableFunction {
lateral,
Expand Down
2 changes: 2 additions & 0 deletions src/query/ast/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4140,6 +4140,7 @@ pub fn table_reference_with_alias(i: Input) -> IResult<TableReference> {
consume: false,
pivot: None,
unpivot: None,
sample: None,
},
)(i)
}
Expand All @@ -4159,6 +4160,7 @@ pub fn table_reference_only(i: Input) -> IResult<TableReference> {
consume: false,
pivot: None,
unpivot: None,
sample: None,
},
)(i)
}
Expand Down
9 changes: 8 additions & 1 deletion src/query/ast/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,8 @@ pub enum TokenKind {
BROTLI,
#[token("BZ2", ignore(ascii_case))]
BZ2,
#[token("BLOCK", ignore(ascii_case))]
BLOCK,
#[token("CALL", ignore(ascii_case))]
CALL,
#[token("CASE", ignore(ascii_case))]
Expand Down Expand Up @@ -924,6 +926,8 @@ pub enum TokenKind {
RETURN_FAILED_ONLY,
#[token("REVERSE", ignore(ascii_case))]
REVERSE,
#[token("SAMPLE", ignore(ascii_case))]
SAMPLE,
#[token("MERGE", ignore(ascii_case))]
MERGE,
#[token("MATCHED", ignore(ascii_case))]
Expand Down Expand Up @@ -1567,6 +1571,7 @@ impl TokenKind {
// | TokenKind::AUTHORIZATION
// | TokenKind::BINARY
| TokenKind::BOTH
| TokenKind::BLOCK
| TokenKind::CASE
| TokenKind::CAST
// | TokenKind::CHECK
Expand Down Expand Up @@ -1620,10 +1625,13 @@ impl TokenKind {
| TokenKind::SELECT
| TokenKind::PIVOT
| TokenKind::UNPIVOT
| TokenKind::ROW
| TokenKind::ROWS
// | TokenKind::SESSION_USER
// | TokenKind::SIMILAR
| TokenKind::SOME
| TokenKind::SEMI
| TokenKind::SAMPLE
// | TokenKind::SYMMETRIC
// | TokenKind::TABLESAMPLE
| TokenKind::THEN
Expand Down Expand Up @@ -1660,7 +1668,6 @@ impl TokenKind {
| TokenKind::OVER
| TokenKind::PARTITION
| TokenKind::QUALIFY
| TokenKind::ROWS
| TokenKind::RANGE
// | TokenKind::OVERLAPS
// | TokenKind::RETURNING
Expand Down
1 change: 1 addition & 0 deletions src/query/sql/src/planner/binder/bind_mutation/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ impl Binder {
consume: false,
pivot: None,
unpivot: None,
sample: None,
};
let source_reference = stmt.source.transform_table_reference();

Expand Down
2 changes: 2 additions & 0 deletions src/query/sql/src/planner/binder/bind_table_reference/bind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ impl Binder {
pivot: _,
unpivot: _,
consume,
sample,
} => self.bind_table(
bind_context,
span,
Expand All @@ -45,6 +46,7 @@ impl Binder {
alias,
temporal,
*consume,
sample,
),
TableReference::TableFunction {
span,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use databend_common_ast::ast::Identifier;
use databend_common_ast::ast::Sample;
use databend_common_ast::ast::Statement;
use databend_common_ast::ast::TableAlias;
use databend_common_ast::ast::TemporalClause;
Expand Down Expand Up @@ -44,6 +45,7 @@ impl Binder {
alias: &Option<TableAlias>,
temporal: &Option<TemporalClause>,
consume: bool,
sample: &Option<Sample>,
) -> Result<(SExpr, BindContext)> {
let table_identifier = TableIdentifier::new(self, catalog, database, table, alias);
let (catalog, database, table_name, table_name_alias) = (
Expand Down Expand Up @@ -142,6 +144,7 @@ impl Binder {
database.as_str(),
table_index,
change_type,
sample,
)?;

if let Some(alias) = alias {
Expand Down Expand Up @@ -247,8 +250,13 @@ impl Binder {
false,
);

let (s_expr, mut bind_context) =
self.bind_base_table(bind_context, database.as_str(), table_index, None)?;
let (s_expr, mut bind_context) = self.bind_base_table(
bind_context,
database.as_str(),
table_index,
None,
sample,
)?;
if let Some(alias) = alias {
bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl Binder {
);

let (s_expr, mut bind_context) =
self.bind_base_table(bind_context, "system", table_index, None)?;
self.bind_base_table(bind_context, "system", table_index, None, &None)?;
if let Some(alias) = alias {
bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?;
}
Expand Down Expand Up @@ -220,7 +220,7 @@ impl Binder {
);

let (s_expr, mut bind_context) =
self.bind_base_table(bind_context, "system", table_index, None)?;
self.bind_base_table(bind_context, "system", table_index, None, &None)?;
if let Some(alias) = alias {
bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?;
}
Expand Down
Loading

0 comments on commit 05abb61

Please sign in to comment.