@@ -27,6 +27,7 @@ def __init__(self,
27
27
dend : datetime ,
28
28
execution_time : datetime ,
29
29
dry_run : bool ):
30
+ self .spillover_query = spillover_query
30
31
self .bigquery_service = bigquery_service
31
32
self .task_config = task_config
32
33
self .sql_query = sql_query
@@ -78,15 +79,26 @@ def transform(self):
78
79
elif bq_destination_table .partitioning_type == "DAY" :
79
80
partition_strategy = timedelta (days = 1 )
80
81
81
- # queries where source data/partition directly map with destination partitions
82
- transformation = MultiPartitionTransformation (self .bigquery_service ,
83
- self .task_config ,
84
- self .sql_query ,
85
- self .dstart , self .dend ,
86
- self .dry_run ,
87
- localised_execution_time ,
88
- partition_strategy ,
89
- self .task_config .concurrency )
82
+ if self .spillover_query :
83
+ transformation = LegacySpilloverTransformation (self .bigquery_service ,
84
+ self .task_config ,
85
+ self .sql_query ,
86
+ self .spillover_query ,
87
+ self .dstart ,
88
+ self .dend ,
89
+ self .dry_run ,
90
+ localised_execution_time ,
91
+ partition_strategy )
92
+ else :
93
+ # queries where source data/partition directly map with destination partitions
94
+ transformation = MultiPartitionTransformation (self .bigquery_service ,
95
+ self .task_config ,
96
+ self .sql_query ,
97
+ self .dstart , self .dend ,
98
+ self .dry_run ,
99
+ localised_execution_time ,
100
+ partition_strategy ,
101
+ self .task_config .concurrency )
90
102
else :
91
103
raise Exception ("unable to generate a transformation for request, unsupported partition strategy" )
92
104
transformation .transform ()
@@ -376,7 +388,11 @@ def transform(self):
376
388
# break query file
377
389
task_queries = self .task_query .split (OPTIMUS_QUERY_BREAK_MARKER )
378
390
if len (task_queries ) < len (datetime_list ):
379
- raise Exception ("query needs to be broken using {}, {} query found, needed {}\n {}" .format (OPTIMUS_QUERY_BREAK_MARKER , len (task_queries ), len (datetime_list ), self .task_query ))
391
+ raise Exception (
392
+ "query needs to be broken using {}, {} query found, needed {}\n {}" .format (OPTIMUS_QUERY_BREAK_MARKER ,
393
+ len (task_queries ),
394
+ len (datetime_list ),
395
+ self .task_query ))
380
396
381
397
tasks = []
382
398
query_index = 0
@@ -413,45 +429,59 @@ def __init__(self,
413
429
sql_query : str ,
414
430
spillover_query : str ,
415
431
start_time : datetime ,
432
+ end_time : datetime ,
416
433
dry_run : bool ,
417
- execution_time : datetime ):
434
+ execution_time : datetime ,
435
+ partition_delta : timedelta ):
418
436
self .bigquery_service = bigquery_service
419
437
self .task_config = task_config
420
438
self .sql_query = sql_query
421
439
self .spillover_query = spillover_query
422
440
self .dry_run = dry_run
423
441
self .start_time = start_time
442
+ self .end_time = end_time
424
443
self .execution_time = execution_time
444
+ self .partition_delta = partition_delta
425
445
426
446
self .concurrency = self .task_config .concurrency
427
447
428
448
def transform (self ):
429
449
datetime_list = []
430
- default_datetime = [self .start_time ]
431
- datetime_list .extend (default_datetime )
450
+ # default_datetime = [self.start_time]
451
+ # datetime_list.extend(default_datetime)
432
452
433
453
if self .task_config .use_spillover :
434
454
spillover = SpilloverDatetimes (self .bigquery_service ,
435
455
self .spillover_query ,
436
456
self .task_config ,
437
457
self .start_time ,
458
+ self .end_time ,
438
459
self .dry_run ,
439
460
self .execution_time )
440
461
spillover_datetimes = spillover .collect_datetimes ()
441
462
datetime_list .extend (spillover_datetimes )
442
463
443
464
datetime_list = distinct_list (datetime_list )
444
465
466
+ execute_for = self .start_time
467
+
468
+ # tables are partitioned for day
469
+ # iterate from start to end for each partition
470
+ while execute_for < self .end_time :
471
+ execute_for += self .partition_delta
472
+
445
473
tasks = []
446
474
for partition_time in datetime_list :
447
475
logger .info ("create transformation for partition: {}" .format (partition_time ))
448
476
loader = PartitionLoader (self .bigquery_service , self .task_config .destination_table ,
449
477
self .task_config .load_method , partition_time )
450
478
479
+ task_window = WindowFactory .create_window_with_time (partition_time , partition_time + self .partition_delta )
480
+
451
481
task = PartitionTransformation (self .task_config ,
452
482
loader ,
453
483
self .sql_query ,
454
- self . window ,
484
+ task_window ,
455
485
self .dry_run ,
456
486
self .execution_time )
457
487
tasks .append (task )
0 commit comments