|
17 | 17 | //! .await?;
|
18 | 18 | //! ````
|
19 | 19 |
|
20 |
| -use core::panic; |
21 |
| -use std::sync::Arc; |
22 |
| -use std::time::{Instant, SystemTime, UNIX_EPOCH}; |
23 |
| - |
24 |
| -use super::writer::DeltaWriter; |
25 | 20 | use crate::logstore::LogStoreRef;
|
| 21 | +use core::panic; |
26 | 22 | use datafusion::execution::context::{SessionContext, SessionState};
|
27 | 23 | use datafusion::physical_plan::filter::FilterExec;
|
| 24 | +use datafusion::physical_plan::projection::ProjectionExec; |
28 | 25 | use datafusion::physical_plan::ExecutionPlan;
|
29 | 26 | use datafusion::prelude::Expr;
|
30 | 27 | use datafusion_common::scalar::ScalarValue;
|
31 | 28 | use datafusion_common::DFSchema;
|
| 29 | +use datafusion_expr::lit; |
| 30 | +use datafusion_physical_expr::{ |
| 31 | + expressions::{self}, |
| 32 | + PhysicalExpr, |
| 33 | +}; |
32 | 34 | use futures::future::BoxFuture;
|
33 |
| -use object_store::prefix::PrefixStore; |
| 35 | +use std::iter; |
| 36 | +use std::sync::Arc; |
| 37 | +use std::time::{Instant, SystemTime, UNIX_EPOCH}; |
| 38 | + |
34 | 39 | use parquet::file::properties::WriterProperties;
|
35 | 40 | use serde::Serialize;
|
36 | 41 |
|
37 | 42 | use super::cdc::should_write_cdc;
|
38 | 43 | use super::datafusion_utils::Expression;
|
39 | 44 | use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
|
40 |
| -use super::write::{write_execution_plan_cdc, write_execution_plan_cdf, WriterStatsConfig}; |
41 |
| -use super::writer::WriterConfig; |
| 45 | +use super::write::{write_execution_plan_cdc, WriterStatsConfig}; |
| 46 | + |
42 | 47 | use crate::delta_datafusion::expr::fmt_expr_to_sql;
|
43 | 48 | use crate::delta_datafusion::{
|
44 | 49 | find_files, register_store, DataFusionMixins, DeltaScanBuilder, DeltaSessionContext,
|
45 | 50 | };
|
46 | 51 | use crate::errors::DeltaResult;
|
47 |
| -use crate::kernel::{Action, Add, AddCDCFile, Remove}; |
| 52 | +use crate::kernel::{Action, Add, Remove}; |
48 | 53 | use crate::operations::write::write_execution_plan;
|
49 | 54 | use crate::protocol::DeltaOperation;
|
50 | 55 | use crate::table::state::DeltaTableState;
|
@@ -191,14 +196,40 @@ async fn excute_non_empty_expr(
|
191 | 196 | // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
|
192 | 197 | match should_write_cdc(&snapshot) {
|
193 | 198 | Ok(true) => {
|
| 199 | + // Create CDC scan |
194 | 200 | let cdc_predicate_expr =
|
195 | 201 | state.create_physical_expr(expression.clone(), &input_dfschema)?;
|
196 | 202 | let cdc_scan: Arc<dyn ExecutionPlan> =
|
197 | 203 | Arc::new(FilterExec::try_new(cdc_predicate_expr, scan.clone())?);
|
| 204 | + |
| 205 | + // Add literal column "_change_type" |
| 206 | + let change_type_lit = lit(ScalarValue::Utf8(Some("delete".to_string()))); |
| 207 | + let change_type_expr = state.create_physical_expr(change_type_lit, &input_dfschema)?; |
| 208 | + |
| 209 | + // Project columns and lit |
| 210 | + let project_expressions: Vec<(Arc<dyn PhysicalExpr>, String)> = scan |
| 211 | + .schema() |
| 212 | + .fields() |
| 213 | + .into_iter() |
| 214 | + .enumerate() |
| 215 | + .map(|(idx, field)| -> (Arc<dyn PhysicalExpr>, String) { |
| 216 | + ( |
| 217 | + Arc::new(expressions::Column::new(field.name(), idx)), |
| 218 | + field.name().to_owned(), |
| 219 | + ) |
| 220 | + }) |
| 221 | + .chain(iter::once((change_type_expr, "_change_type".to_owned()))) |
| 222 | + .collect(); |
| 223 | + |
| 224 | + let projected_scan: Arc<dyn ExecutionPlan> = Arc::new(ProjectionExec::try_new( |
| 225 | + project_expressions, |
| 226 | + cdc_scan.clone(), |
| 227 | + )?); |
| 228 | + |
198 | 229 | let cdc_actions = write_execution_plan_cdc(
|
199 | 230 | Some(snapshot),
|
200 | 231 | state.clone(),
|
201 |
| - cdc_scan.clone(), |
| 232 | + projected_scan.clone(), |
202 | 233 | table_partition_cols.clone(),
|
203 | 234 | log_store.object_store(),
|
204 | 235 | Some(snapshot.table_config().target_file_size() as usize),
|
|
0 commit comments