Skip to content

Commit 5a2f060

Browse files
committed
analyze table noscan
1 parent cae80b0 commit 5a2f060

File tree

13 files changed

+341
-115
lines changed

13 files changed

+341
-115
lines changed

src/query/ast/src/ast/statements/table.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ pub struct AnalyzeTableStmt {
724724
pub catalog: Option<Identifier>,
725725
pub database: Option<Identifier>,
726726
pub table: Identifier,
727+
pub full: bool,
727728
}
728729

729730
impl Display for AnalyzeTableStmt {
@@ -736,6 +737,9 @@ impl Display for AnalyzeTableStmt {
736737
.chain(&self.database)
737738
.chain(Some(&self.table)),
738739
)?;
740+
if self.full {
741+
write!(f, " FULL")?;
742+
}
739743

740744
Ok(())
741745
}

src/query/ast/src/parser/statement.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,13 +1168,14 @@ pub fn statement_body(i: Input) -> IResult<Statement> {
11681168
);
11691169
let analyze_table = map(
11701170
rule! {
1171-
ANALYZE ~ TABLE ~ #dot_separated_idents_1_to_3
1171+
ANALYZE ~ TABLE ~ #dot_separated_idents_1_to_3 ~ NOSCAN?
11721172
},
1173-
|(_, _, (catalog, database, table))| {
1173+
|(_, _, (catalog, database, table), no_scan)| {
11741174
Statement::AnalyzeTable(AnalyzeTableStmt {
11751175
catalog,
11761176
database,
11771177
table,
1178+
full: no_scan.is_none(),
11781179
})
11791180
},
11801181
);

src/query/ast/src/parser/token.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,6 +866,8 @@ pub enum TokenKind {
866866
NO_PASSWORD,
867867
#[token("NONE", ignore(ascii_case))]
868868
NONE,
869+
#[token("NOSCAN", ignore(ascii_case))]
870+
NOSCAN,
869871
#[token("NOT", ignore(ascii_case))]
870872
NOT,
871873
#[token("NOTENANTSETTING", ignore(ascii_case))]

src/query/service/src/interpreters/interpreter_table_analyze.rs

Lines changed: 105 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use databend_common_sql::Planner;
3535
use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS;
3636
use databend_common_storages_factory::NavigationPoint;
3737
use databend_common_storages_factory::Table;
38+
use databend_common_storages_fuse::operations::AnalyzeLightMutator;
3839
use databend_common_storages_fuse::operations::HistogramInfoSink;
3940
use databend_common_storages_fuse::FuseTable;
4041
use databend_storages_common_index::Index;
@@ -109,66 +110,71 @@ impl Interpreter for AnalyzeTableInterpreter {
109110
Err(_) => return Ok(PipelineBuildResult::create()),
110111
};
111112

112-
let r = table.read_table_snapshot().await;
113-
let snapshot_opt = match r {
114-
Err(e) => return Err(e),
115-
Ok(v) => v,
113+
let Some(snapshot) = table.read_table_snapshot().await? else {
114+
return Ok(PipelineBuildResult::create());
116115
};
117116

118-
if let Some(snapshot) = snapshot_opt {
119-
// plan sql
120-
let _table_info = table.get_table_info();
117+
if !self.plan.full {
118+
let operator = table.get_operator();
119+
let cluster_key_id = table.cluster_key_id();
120+
let mut mutator =
121+
AnalyzeLightMutator::create(self.ctx.clone(), operator, snapshot, cluster_key_id);
122+
mutator.target_select().await?;
123+
mutator.try_commit(table).await?;
124+
return Ok(PipelineBuildResult::create());
125+
}
121126

122-
let table_statistics = table
123-
.read_table_snapshot_statistics(Some(&snapshot))
124-
.await?;
127+
// plan sql
128+
let _table_info = table.get_table_info();
125129

126-
let (is_full, temporal_str) = if let Some(table_statistics) = &table_statistics {
127-
let is_full = match table
128-
.navigate_to_point(
129-
&NavigationPoint::SnapshotID(
130-
table_statistics.snapshot_id.simple().to_string(),
131-
),
132-
self.ctx.clone().get_abort_checker(),
133-
)
130+
let table_statistics = table
131+
.read_table_snapshot_statistics(Some(&snapshot))
132+
.await?;
133+
134+
let (is_full, temporal_str) = if let Some(table_statistics) = &table_statistics {
135+
let is_full = match table
136+
.navigate_to_point(
137+
&NavigationPoint::SnapshotID(table_statistics.snapshot_id.simple().to_string()),
138+
self.ctx.clone().get_abort_checker(),
139+
)
140+
.await
141+
{
142+
Ok(t) => !t
143+
.read_table_snapshot()
134144
.await
135-
{
136-
Ok(t) => !t
137-
.read_table_snapshot()
138-
.await
139-
.is_ok_and(|s| s.is_some_and(|s| s.prev_table_seq.is_some())),
140-
Err(_) => true,
141-
};
145+
.is_ok_and(|s| s.is_some_and(|s| s.prev_table_seq.is_some())),
146+
Err(_) => true,
147+
};
142148

143-
let temporal_str = if is_full {
144-
format!("AT (snapshot => '{}')", snapshot.snapshot_id.simple())
145-
} else {
146-
// analyze only need to collect the added blocks.
147-
let table_alias = format!("_change_insert${:08x}", Utc::now().timestamp());
148-
format!(
149+
let temporal_str = if is_full {
150+
format!("AT (snapshot => '{}')", snapshot.snapshot_id.simple())
151+
} else {
152+
// analyze only need to collect the added blocks.
153+
let table_alias = format!("_change_insert${:08x}", Utc::now().timestamp());
154+
format!(
149155
"CHANGES(INFORMATION => DEFAULT) AT (snapshot => '{}') END (snapshot => '{}') AS {table_alias}",
150156
table_statistics.snapshot_id.simple(),
151157
snapshot.snapshot_id.simple(),
152158
)
153-
};
154-
(is_full, temporal_str)
155-
} else {
156-
(
157-
true,
158-
format!("AT (snapshot => '{}')", snapshot.snapshot_id.simple()),
159-
)
160159
};
160+
(is_full, temporal_str)
161+
} else {
162+
(
163+
true,
164+
format!("AT (snapshot => '{}')", snapshot.snapshot_id.simple()),
165+
)
166+
};
161167

162-
let quote = self
163-
.ctx
164-
.get_settings()
165-
.get_sql_dialect()?
166-
.default_ident_quote();
168+
let quote = self
169+
.ctx
170+
.get_settings()
171+
.get_sql_dialect()?
172+
.default_ident_quote();
167173

168-
// 0.01625 --> 12 buckets --> 4K size per column
169-
// 1.04 / math.sqrt(1<<12) --> 0.01625
170-
const DISTINCT_ERROR_RATE: f64 = 0.01625;
171-
let ndv_select_expr = snapshot
174+
// 0.01625 --> 12 buckets --> 4K size per column
175+
// 1.04 / math.sqrt(1<<12) --> 0.01625
176+
const DISTINCT_ERROR_RATE: f64 = 0.01625;
177+
let ndv_select_expr = snapshot
172178
.schema
173179
.fields()
174180
.iter()
@@ -182,22 +188,22 @@ impl Interpreter for AnalyzeTableInterpreter {
182188
})
183189
.join(", ");
184190

185-
let sql = format!(
186-
"SELECT {ndv_select_expr}, {is_full} as is_full from {}.{} {temporal_str}",
187-
plan.database, plan.table,
188-
);
191+
let sql = format!(
192+
"SELECT {ndv_select_expr}, {is_full} as is_full from {}.{} {temporal_str}",
193+
plan.database, plan.table,
194+
);
189195

190-
info!("Analyze via sql: {sql}");
196+
info!("Analyze via sql: {sql}");
191197

192-
let (physical_plan, bind_context) = self.plan_sql(sql).await?;
193-
let mut build_res =
194-
build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?;
195-
// After profiling, computing histogram is heavy and the bottleneck is window function(90%).
196-
// It's possible to OOM if the table is too large and spilling isn't enabled.
197-
// We add a setting `enable_analyze_histogram` to control whether to compute histogram(default is closed).
198-
let mut histogram_info_receivers = HashMap::new();
199-
if self.ctx.get_settings().get_enable_analyze_histogram()? {
200-
let histogram_sqls = table
198+
let (physical_plan, bind_context) = self.plan_sql(sql).await?;
199+
let mut build_res =
200+
build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?;
201+
// After profiling, computing histogram is heavy and the bottleneck is window function(90%).
202+
// It's possible to OOM if the table is too large and spilling isn't enabled.
203+
// We add a setting `enable_analyze_histogram` to control whether to compute histogram(default is closed).
204+
let mut histogram_info_receivers = HashMap::new();
205+
if self.ctx.get_settings().get_enable_analyze_histogram()? {
206+
let histogram_sqls = table
201207
.schema()
202208
.fields()
203209
.iter()
@@ -222,50 +228,47 @@ impl Interpreter for AnalyzeTableInterpreter {
222228
)
223229
})
224230
.collect::<Vec<_>>();
225-
for (sql, col_id) in histogram_sqls.into_iter() {
226-
info!("Analyze histogram via sql: {sql}");
227-
let (mut histogram_plan, bind_context) = self.plan_sql(sql).await?;
228-
if !self.ctx.get_cluster().is_empty() {
229-
histogram_plan = remove_exchange(histogram_plan);
230-
}
231-
let mut histogram_build_res = build_query_pipeline(
232-
&QueryContext::create_from(self.ctx.as_ref()),
233-
&bind_context.columns,
234-
&histogram_plan,
235-
false,
236-
)
237-
.await?;
238-
let (tx, rx) = async_channel::unbounded();
239-
histogram_build_res.main_pipeline.add_sink(|input_port| {
240-
Ok(ProcessorPtr::create(HistogramInfoSink::create(
241-
Some(tx.clone()),
242-
input_port.clone(),
243-
)))
244-
})?;
245-
246-
build_res
247-
.sources_pipelines
248-
.push(histogram_build_res.main_pipeline.finalize(None));
249-
build_res
250-
.sources_pipelines
251-
.extend(histogram_build_res.sources_pipelines);
252-
histogram_info_receivers.insert(col_id, rx);
231+
for (sql, col_id) in histogram_sqls.into_iter() {
232+
info!("Analyze histogram via sql: {sql}");
233+
let (mut histogram_plan, bind_context) = self.plan_sql(sql).await?;
234+
if !self.ctx.get_cluster().is_empty() {
235+
histogram_plan = remove_exchange(histogram_plan);
253236
}
237+
let mut histogram_build_res = build_query_pipeline(
238+
&QueryContext::create_from(self.ctx.as_ref()),
239+
&bind_context.columns,
240+
&histogram_plan,
241+
false,
242+
)
243+
.await?;
244+
let (tx, rx) = async_channel::unbounded();
245+
histogram_build_res.main_pipeline.add_sink(|input_port| {
246+
Ok(ProcessorPtr::create(HistogramInfoSink::create(
247+
Some(tx.clone()),
248+
input_port.clone(),
249+
)))
250+
})?;
251+
252+
build_res
253+
.sources_pipelines
254+
.push(histogram_build_res.main_pipeline.finalize(None));
255+
build_res
256+
.sources_pipelines
257+
.extend(histogram_build_res.sources_pipelines);
258+
histogram_info_receivers.insert(col_id, rx);
254259
}
255-
FuseTable::do_analyze(
256-
self.ctx.clone(),
257-
bind_context.output_schema(),
258-
&self.plan.catalog,
259-
&self.plan.database,
260-
&self.plan.table,
261-
snapshot.snapshot_id,
262-
&mut build_res.main_pipeline,
263-
histogram_info_receivers,
264-
)?;
265-
return Ok(build_res);
266260
}
267-
268-
return Ok(PipelineBuildResult::create());
261+
FuseTable::do_analyze(
262+
self.ctx.clone(),
263+
bind_context.output_schema(),
264+
&self.plan.catalog,
265+
&self.plan.database,
266+
&self.plan.table,
267+
snapshot.snapshot_id,
268+
&mut build_res.main_pipeline,
269+
histogram_info_receivers,
270+
)?;
271+
Ok(build_res)
269272
}
270273
}
271274

src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ async fn test_compact_segment_normal_case() -> Result<()> {
8989
let mutator = build_mutator(fuse_table, ctx.clone(), None).await?;
9090
assert!(mutator.is_some());
9191
let mutator = mutator.unwrap();
92-
mutator.try_commit(table.clone()).await?;
92+
mutator.try_commit(fuse_table).await?;
9393

9494
// check segment count
9595
let qry = "select segment_count as count from fuse_snapshot('default', 't') limit 1";
@@ -134,7 +134,7 @@ async fn test_compact_segment_resolvable_conflict() -> Result<()> {
134134
let num_inserts = 9;
135135
fixture.append_rows(num_inserts).await?;
136136

137-
mutator.try_commit(table.clone()).await?;
137+
mutator.try_commit(fuse_table).await?;
138138

139139
// check segment count
140140
let count_seg = "select segment_count as count from fuse_snapshot('default', 't') limit 1";
@@ -194,7 +194,7 @@ async fn test_compact_segment_unresolvable_conflict() -> Result<()> {
194194
}
195195

196196
// the compact operation committed latter should be failed.
197-
let r = mutator.try_commit(table.clone()).await;
197+
let r = mutator.try_commit(fuse_table).await;
198198
assert!(r.is_err());
199199
assert_eq!(r.err().unwrap().code(), ErrorCode::UNRESOLVABLE_CONFLICT);
200200

@@ -232,7 +232,7 @@ async fn check_count(result_stream: SendableDataBlockStream) -> Result<u64> {
232232
pub async fn compact_segment(ctx: Arc<QueryContext>, table: &Arc<dyn Table>) -> Result<()> {
233233
let fuse_table = FuseTable::try_from_table(table.as_ref())?;
234234
let mutator = build_mutator(fuse_table, ctx.clone(), None).await?.unwrap();
235-
mutator.try_commit(table.clone()).await
235+
mutator.try_commit(fuse_table).await
236236
}
237237

238238
async fn build_mutator(

src/query/sql/src/planner/binder/ddl/table.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,7 @@ impl Binder {
13851385
catalog,
13861386
database,
13871387
table,
1388+
full,
13881389
} = stmt;
13891390

13901391
let (catalog, database, table) =
@@ -1394,6 +1395,7 @@ impl Binder {
13941395
catalog,
13951396
database,
13961397
table,
1398+
full: *full,
13971399
})))
13981400
}
13991401

src/query/sql/src/planner/plans/ddl/table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ pub struct AnalyzeTablePlan {
205205
pub catalog: String,
206206
pub database: String,
207207
pub table: String,
208+
pub full: bool,
208209
}
209210

210211
impl AnalyzeTablePlan {

src/query/storages/fuse/src/operations/commit.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,11 @@ impl FuseTable {
342342
)?;
343343

344344
let schema = self.schema();
345+
if schema != latest_table_info.schema() {
346+
return Err(ErrorCode::StorageOther(
347+
"The schema of the table has changed",
348+
));
349+
}
345350
let (segments_tobe_committed, statistics_tobe_committed) = Self::merge_with_base(
346351
ctx.clone(),
347352
self.operator.clone(),

src/query/storages/fuse/src/operations/compact.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ impl FuseTable {
6666
return Ok(());
6767
}
6868

69-
segment_mutator.try_commit(Arc::new(self.clone())).await
69+
segment_mutator.try_commit(self).await
7070
}
7171

7272
#[async_backtrace::framed]

0 commit comments

Comments
 (0)