@@ -21,6 +21,7 @@ use core::panic;
21
21
use std:: sync:: Arc ;
22
22
use std:: time:: { Instant , SystemTime , UNIX_EPOCH } ;
23
23
24
+ use super :: writer:: DeltaWriter ;
24
25
use crate :: logstore:: LogStoreRef ;
25
26
use datafusion:: execution:: context:: { SessionContext , SessionState } ;
26
27
use datafusion:: physical_plan:: filter:: FilterExec ;
@@ -29,18 +30,21 @@ use datafusion::prelude::Expr;
29
30
use datafusion_common:: scalar:: ScalarValue ;
30
31
use datafusion_common:: DFSchema ;
31
32
use futures:: future:: BoxFuture ;
33
+ use object_store:: prefix:: PrefixStore ;
32
34
use parquet:: file:: properties:: WriterProperties ;
33
35
use serde:: Serialize ;
34
36
37
+ use super :: cdc:: should_write_cdc;
35
38
use super :: datafusion_utils:: Expression ;
36
39
use super :: transaction:: { CommitBuilder , CommitProperties , PROTOCOL } ;
37
- use super :: write:: WriterStatsConfig ;
40
+ use super :: write:: { write_execution_plan_cdc, write_execution_plan_cdf, WriterStatsConfig } ;
41
+ use super :: writer:: WriterConfig ;
38
42
use crate :: delta_datafusion:: expr:: fmt_expr_to_sql;
39
43
use crate :: delta_datafusion:: {
40
44
find_files, register_store, DataFusionMixins , DeltaScanBuilder , DeltaSessionContext ,
41
45
} ;
42
46
use crate :: errors:: DeltaResult ;
43
- use crate :: kernel:: { Action , Add , Remove } ;
47
+ use crate :: kernel:: { Action , Add , AddCDCFile , Remove } ;
44
48
use crate :: operations:: write:: write_execution_plan;
45
49
use crate :: protocol:: DeltaOperation ;
46
50
use crate :: table:: state:: DeltaTableState ;
@@ -130,7 +134,7 @@ async fn excute_non_empty_expr(
130
134
metrics : & mut DeleteMetrics ,
131
135
rewrite : & [ Add ] ,
132
136
writer_properties : Option < WriterProperties > ,
133
- ) -> DeltaResult < Vec < Add > > {
137
+ ) -> DeltaResult < Vec < Action > > {
134
138
// For each identified file perform a parquet scan + filter + limit (1) + count.
135
139
// If returned count is not zero then append the file to be rewritten and removed from the log. Otherwise do nothing to the file.
136
140
@@ -160,27 +164,53 @@ async fn excute_non_empty_expr(
160
164
. map ( |v| v. iter ( ) . map ( |v| v. to_string ( ) ) . collect :: < Vec < String > > ( ) ) ,
161
165
) ;
162
166
163
- let add_actions = write_execution_plan (
167
+ let mut actions : Vec < Action > = write_execution_plan (
164
168
Some ( snapshot) ,
165
169
state. clone ( ) ,
166
170
filter. clone ( ) ,
167
171
table_partition_cols. clone ( ) ,
168
172
log_store. object_store ( ) ,
169
173
Some ( snapshot. table_config ( ) . target_file_size ( ) as usize ) ,
170
174
None ,
171
- writer_properties,
175
+ writer_properties. clone ( ) ,
172
176
false ,
173
177
None ,
174
- writer_stats_config,
178
+ writer_stats_config. clone ( ) ,
175
179
None ,
176
180
)
177
181
. await ?
178
182
. into_iter ( )
179
183
. map ( |a| match a {
180
184
Action :: Add ( a) => a,
181
185
_ => panic ! ( "Expected Add action" ) ,
182
- } )
183
- . collect :: < Vec < Add > > ( ) ;
186
+ } ) . into_iter ( ) . map ( Action :: Add ) . collect ( ) ;
187
+
188
+ // CDC logic, simply filters data with predicate and adds the _change_type="delete" as literal column
189
+ match should_write_cdc ( & snapshot) {
190
+ Ok ( true ) => {
191
+ let cdc_predicate_expr =
192
+ state. create_physical_expr ( expression. clone ( ) , & input_dfschema) ?;
193
+ let cdc_scan: Arc < dyn ExecutionPlan > =
194
+ Arc :: new ( FilterExec :: try_new ( cdc_predicate_expr, scan. clone ( ) ) ?) ;
195
+ let cdc_actions = write_execution_plan_cdc (
196
+ Some ( snapshot) ,
197
+ state. clone ( ) ,
198
+ cdc_scan. clone ( ) ,
199
+ table_partition_cols. clone ( ) ,
200
+ log_store. object_store ( ) ,
201
+ Some ( snapshot. table_config ( ) . target_file_size ( ) as usize ) ,
202
+ None ,
203
+ writer_properties,
204
+ false ,
205
+ None ,
206
+ writer_stats_config,
207
+ None ,
208
+ )
209
+ . await ?;
210
+ actions. extend ( cdc_actions)
211
+ }
212
+ _ => ( ) ,
213
+ } ;
184
214
185
215
let read_records = scan. parquet_scan . metrics ( ) . and_then ( |m| m. output_rows ( ) ) ;
186
216
let filter_records = filter. metrics ( ) . and_then ( |m| m. output_rows ( ) ) ;
@@ -189,7 +219,7 @@ async fn excute_non_empty_expr(
189
219
. zip ( filter_records)
190
220
. map ( |( read, filter) | read - filter) ;
191
221
192
- Ok ( add_actions )
222
+ Ok ( actions )
193
223
}
194
224
195
225
async fn execute (
@@ -209,7 +239,7 @@ async fn execute(
209
239
210
240
let predicate = predicate. unwrap_or ( Expr :: Literal ( ScalarValue :: Boolean ( Some ( true ) ) ) ) ;
211
241
212
- let add = if candidates. partition_scan {
242
+ let mut actions = if candidates. partition_scan {
213
243
Vec :: new ( )
214
244
} else {
215
245
let write_start = Instant :: now ( ) ;
@@ -233,7 +263,6 @@ async fn execute(
233
263
. unwrap ( )
234
264
. as_millis ( ) as i64 ;
235
265
236
- let mut actions: Vec < Action > = add. into_iter ( ) . map ( Action :: Add ) . collect ( ) ;
237
266
metrics. num_removed_files = remove. len ( ) ;
238
267
metrics. num_added_files = actions. len ( ) ;
239
268
0 commit comments