-
Notifications
You must be signed in to change notification settings - Fork 4.3k
/
Copy pathDefaultJobPersistence.java
1389 lines (1245 loc) · 62.9 KB
/
DefaultJobPersistence.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.persistence.job;
import static io.airbyte.db.instance.jobs.jooq.generated.Tables.ATTEMPTS;
import static io.airbyte.db.instance.jobs.jooq.generated.Tables.JOBS;
import static io.airbyte.db.instance.jobs.jooq.generated.Tables.NORMALIZATION_SUMMARIES;
import static io.airbyte.db.instance.jobs.jooq.generated.Tables.STREAM_STATS;
import static io.airbyte.db.instance.jobs.jooq.generated.Tables.SYNC_STATS;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.JsonNodeType;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.UnmodifiableIterator;
import io.airbyte.commons.enums.Enums;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.protocol.migrations.v1.CatalogMigrationV1Helper;
import io.airbyte.commons.resources.MoreResources;
import io.airbyte.commons.text.Names;
import io.airbyte.commons.text.Sqls;
import io.airbyte.commons.version.AirbyteProtocolVersion;
import io.airbyte.commons.version.AirbyteProtocolVersionRange;
import io.airbyte.commons.version.AirbyteVersion;
import io.airbyte.commons.version.Version;
import io.airbyte.config.AttemptFailureSummary;
import io.airbyte.config.AttemptSyncConfig;
import io.airbyte.config.FailureReason;
import io.airbyte.config.JobConfig;
import io.airbyte.config.JobConfig.ConfigType;
import io.airbyte.config.JobOutput;
import io.airbyte.config.JobOutput.OutputType;
import io.airbyte.config.NormalizationSummary;
import io.airbyte.config.StreamSyncStats;
import io.airbyte.config.SyncStats;
import io.airbyte.config.persistence.PersistenceHelpers;
import io.airbyte.db.Database;
import io.airbyte.db.ExceptionWrappingDatabase;
import io.airbyte.db.instance.jobs.JobsDatabaseSchema;
import io.airbyte.db.jdbc.JdbcUtils;
import io.airbyte.persistence.job.models.Attempt;
import io.airbyte.persistence.job.models.AttemptNormalizationStatus;
import io.airbyte.persistence.job.models.AttemptStatus;
import io.airbyte.persistence.job.models.AttemptWithJobInfo;
import io.airbyte.persistence.job.models.Job;
import io.airbyte.persistence.job.models.JobStatus;
import io.airbyte.persistence.job.models.JobWithStatusAndTimestamp;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.file.Path;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.jooq.DSLContext;
import org.jooq.Field;
import org.jooq.InsertValuesStepN;
import org.jooq.JSONB;
import org.jooq.Named;
import org.jooq.Record;
import org.jooq.RecordMapper;
import org.jooq.Result;
import org.jooq.Sequence;
import org.jooq.Table;
import org.jooq.conf.ParamType;
import org.jooq.impl.DSL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DefaultJobPersistence implements JobPersistence {
// not static because job history test case manipulates these.
private final int JOB_HISTORY_MINIMUM_AGE_IN_DAYS;
private final int JOB_HISTORY_MINIMUM_RECENCY;
private final int JOB_HISTORY_EXCESSIVE_NUMBER_OF_JOBS;
private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJobPersistence.class);
public static final String ATTEMPT_NUMBER = "attempt_number";
private static final String JOB_ID = "job_id";
private static final String WHERE = "WHERE ";
private static final String AND = " AND ";
private static final String SCOPE_CLAUSE = "scope = ? AND ";
protected static final String DEFAULT_SCHEMA = "public";
private static final String BACKUP_SCHEMA = "import_backup";
public static final String DEPLOYMENT_ID_KEY = "deployment_id";
public static final String METADATA_KEY_COL = "key";
public static final String METADATA_VAL_COL = "value";
@VisibleForTesting
static final String BASE_JOB_SELECT_AND_JOIN = jobSelectAndJoin("jobs");
private static final String AIRBYTE_METADATA_TABLE = "airbyte_metadata";
public static final String ORDER_BY_JOB_TIME_ATTEMPT_TIME =
"ORDER BY jobs.created_at DESC, jobs.id DESC, attempts.created_at ASC, attempts.id ASC ";
public static final String ORDER_BY_JOB_CREATED_AT_DESC = "ORDER BY jobs.created_at DESC ";
public static final String LIMIT_1 = "LIMIT 1 ";
private static final String JOB_STATUS_IS_NON_TERMINAL = String.format("status IN (%s) ",
JobStatus.NON_TERMINAL_STATUSES.stream()
.map(Sqls::toSqlName)
.map(Names::singleQuote)
.collect(Collectors.joining(",")));
private final ExceptionWrappingDatabase jobDatabase;
private final Supplier<Instant> timeSupplier;
@VisibleForTesting
DefaultJobPersistence(final Database jobDatabase,
final Supplier<Instant> timeSupplier,
final int minimumAgeInDays,
final int excessiveNumberOfJobs,
final int minimumRecencyCount) {
this.jobDatabase = new ExceptionWrappingDatabase(jobDatabase);
this.timeSupplier = timeSupplier;
JOB_HISTORY_MINIMUM_AGE_IN_DAYS = minimumAgeInDays;
JOB_HISTORY_EXCESSIVE_NUMBER_OF_JOBS = excessiveNumberOfJobs;
JOB_HISTORY_MINIMUM_RECENCY = minimumRecencyCount;
}
public DefaultJobPersistence(final Database jobDatabase) {
this(jobDatabase, Instant::now, 30, 500, 10);
}
private static String jobSelectAndJoin(final String jobsSubquery) {
return "SELECT\n"
+ "jobs.id AS job_id,\n"
+ "jobs.config_type AS config_type,\n"
+ "jobs.scope AS scope,\n"
+ "jobs.config AS config,\n"
+ "jobs.status AS job_status,\n"
+ "jobs.started_at AS job_started_at,\n"
+ "jobs.created_at AS job_created_at,\n"
+ "jobs.updated_at AS job_updated_at,\n"
+ "attempts.attempt_number AS attempt_number,\n"
+ "attempts.attempt_sync_config AS attempt_sync_config,\n"
+ "attempts.log_path AS log_path,\n"
+ "attempts.output AS attempt_output,\n"
+ "attempts.status AS attempt_status,\n"
+ "attempts.processing_task_queue AS processing_task_queue,\n"
+ "attempts.failure_summary AS attempt_failure_summary,\n"
+ "attempts.created_at AS attempt_created_at,\n"
+ "attempts.updated_at AS attempt_updated_at,\n"
+ "attempts.ended_at AS attempt_ended_at\n"
+ "FROM " + jobsSubquery + " LEFT OUTER JOIN attempts ON jobs.id = attempts.job_id ";
}
/**
* @param scope This is the primary id of a standard sync (StandardSync#connectionId).
*/
@Override
public Optional<Long> enqueueJob(final String scope, final JobConfig jobConfig) throws IOException {
LOGGER.info("enqueuing pending job for scope: {}", scope);
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
final String queueingRequest = Job.REPLICATION_TYPES.contains(jobConfig.getConfigType())
? String.format("WHERE NOT EXISTS (SELECT 1 FROM jobs WHERE config_type IN (%s) AND scope = '%s' AND status NOT IN (%s)) ",
Job.REPLICATION_TYPES.stream().map(Sqls::toSqlName).map(Names::singleQuote).collect(Collectors.joining(",")),
scope,
JobStatus.TERMINAL_STATUSES.stream().map(Sqls::toSqlName).map(Names::singleQuote).collect(Collectors.joining(",")))
: "";
return jobDatabase.query(
ctx -> ctx.fetch(
"INSERT INTO jobs(config_type, scope, created_at, updated_at, status, config) " +
"SELECT CAST(? AS JOB_CONFIG_TYPE), ?, ?, ?, CAST(? AS JOB_STATUS), CAST(? as JSONB) " +
queueingRequest +
"RETURNING id ",
Sqls.toSqlName(jobConfig.getConfigType()),
scope,
now,
now,
Sqls.toSqlName(JobStatus.PENDING),
Jsons.serialize(jobConfig)))
.stream()
.findFirst()
.map(r -> r.getValue("id", Long.class));
}
@Override
public void resetJob(final long jobId) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.query(ctx -> {
updateJobStatus(ctx, jobId, JobStatus.PENDING, now);
return null;
});
}
@Override
public void cancelJob(final long jobId) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.query(ctx -> {
updateJobStatus(ctx, jobId, JobStatus.CANCELLED, now);
return null;
});
}
@Override
public void failJob(final long jobId) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.query(ctx -> {
updateJobStatus(ctx, jobId, JobStatus.FAILED, now);
return null;
});
}
private void updateJobStatus(final DSLContext ctx, final long jobId, final JobStatus newStatus, final LocalDateTime now) {
final Job job = getJob(ctx, jobId);
if (job.isJobInTerminalState()) {
// If the job is already terminal, no need to set a new status
return;
}
job.validateStatusTransition(newStatus);
ctx.execute(
"UPDATE jobs SET status = CAST(? as JOB_STATUS), updated_at = ? WHERE id = ?",
Sqls.toSqlName(newStatus),
now,
jobId);
}
@Override
public int createAttempt(final long jobId, final Path logPath) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
return jobDatabase.transaction(ctx -> {
final Job job = getJob(ctx, jobId);
if (job.isJobInTerminalState()) {
final var errMsg = String.format(
"Cannot create an attempt for a job id: %s that is in a terminal state: %s for connection id: %s",
job.getId(), job.getStatus(), job.getScope());
throw new IllegalStateException(errMsg);
}
if (job.hasRunningAttempt()) {
final var errMsg = String.format(
"Cannot create an attempt for a job id: %s that has a running attempt: %s for connection id: %s",
job.getId(), job.getStatus(), job.getScope());
throw new IllegalStateException(errMsg);
}
updateJobStatus(ctx, jobId, JobStatus.RUNNING, now);
// will fail if attempt number already exists for the job id.
return ctx.fetch(
"INSERT INTO attempts(job_id, attempt_number, log_path, status, created_at, updated_at) VALUES(?, ?, ?, CAST(? AS ATTEMPT_STATUS), ?, ?) RETURNING attempt_number",
jobId,
job.getAttemptsCount(),
logPath.toString(),
Sqls.toSqlName(AttemptStatus.RUNNING),
now,
now)
.stream()
.findFirst()
.map(r -> r.get(ATTEMPT_NUMBER, Integer.class))
.orElseThrow(() -> new RuntimeException("This should not happen"));
});
}
@Override
public void failAttempt(final long jobId, final int attemptNumber) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(ctx -> {
updateJobStatus(ctx, jobId, JobStatus.INCOMPLETE, now);
ctx.execute(
"UPDATE attempts SET status = CAST(? as ATTEMPT_STATUS), updated_at = ? , ended_at = ? WHERE job_id = ? AND attempt_number = ?",
Sqls.toSqlName(AttemptStatus.FAILED),
now,
now,
jobId,
attemptNumber);
return null;
});
}
@Override
public void succeedAttempt(final long jobId, final int attemptNumber) throws IOException {
final LocalDateTime now = LocalDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(ctx -> {
updateJobStatus(ctx, jobId, JobStatus.SUCCEEDED, now);
ctx.execute(
"UPDATE attempts SET status = CAST(? as ATTEMPT_STATUS), updated_at = ? , ended_at = ? WHERE job_id = ? AND attempt_number = ?",
Sqls.toSqlName(AttemptStatus.SUCCEEDED),
now,
now,
jobId,
attemptNumber);
return null;
});
}
@Override
public void setAttemptTemporalWorkflowInfo(final long jobId,
final int attemptNumber,
final String temporalWorkflowId,
final String processingTaskQueue)
throws IOException {
jobDatabase.query(ctx -> ctx.execute(
" UPDATE attempts SET temporal_workflow_id = ? , processing_task_queue = ? WHERE job_id = ? AND attempt_number = ?",
temporalWorkflowId,
processingTaskQueue,
jobId,
attemptNumber));
}
@Override
public Optional<String> getAttemptTemporalWorkflowId(final long jobId, final int attemptNumber) throws IOException {
final var result = jobDatabase.query(ctx -> ctx.fetch(
" SELECT temporal_workflow_id from attempts WHERE job_id = ? AND attempt_number = ?",
jobId,
attemptNumber)).stream().findFirst();
if (result.isEmpty() || result.get().get("temporal_workflow_id") == null) {
return Optional.empty();
}
return Optional.of(result.get().get("temporal_workflow_id", String.class));
}
@Override
public void writeOutput(final long jobId, final int attemptNumber, final JobOutput output)
throws IOException {
final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(ctx -> {
ctx.update(ATTEMPTS)
.set(ATTEMPTS.OUTPUT, JSONB.valueOf(Jsons.serialize(output)))
.set(ATTEMPTS.UPDATED_AT, now)
.where(ATTEMPTS.JOB_ID.eq(jobId), ATTEMPTS.ATTEMPT_NUMBER.eq(attemptNumber))
.execute();
final Long attemptId = getAttemptId(jobId, attemptNumber, ctx);
final SyncStats syncStats = output.getSync().getStandardSyncSummary().getTotalStats();
if (syncStats != null) {
saveToSyncStatsTable(now, syncStats, attemptId, ctx);
}
final NormalizationSummary normalizationSummary = output.getSync().getNormalizationSummary();
if (normalizationSummary != null) {
ctx.insertInto(NORMALIZATION_SUMMARIES)
.set(NORMALIZATION_SUMMARIES.ID, UUID.randomUUID())
.set(NORMALIZATION_SUMMARIES.UPDATED_AT, now)
.set(NORMALIZATION_SUMMARIES.CREATED_AT, now)
.set(NORMALIZATION_SUMMARIES.ATTEMPT_ID, attemptId)
.set(NORMALIZATION_SUMMARIES.START_TIME,
OffsetDateTime.ofInstant(Instant.ofEpochMilli(normalizationSummary.getStartTime()), ZoneOffset.UTC))
.set(NORMALIZATION_SUMMARIES.END_TIME, OffsetDateTime.ofInstant(Instant.ofEpochMilli(normalizationSummary.getEndTime()), ZoneOffset.UTC))
.set(NORMALIZATION_SUMMARIES.FAILURES, JSONB.valueOf(Jsons.serialize(normalizationSummary.getFailures())))
.execute();
}
return null;
});
}
@Override
public void writeStats(final long jobId,
final int attemptNumber,
final long estimatedRecords,
final long estimatedBytes,
final long recordsEmitted,
final long bytesEmitted,
final List<StreamSyncStats> streamStats)
throws IOException {
final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(ctx -> {
final var attemptId = getAttemptId(jobId, attemptNumber, ctx);
final var syncStats = new SyncStats()
.withEstimatedRecords(estimatedRecords)
.withEstimatedBytes(estimatedBytes)
.withRecordsEmitted(recordsEmitted)
.withBytesEmitted(bytesEmitted);
saveToSyncStatsTable(now, syncStats, attemptId, ctx);
saveToStreamStatsTable(now, streamStats, attemptId, ctx);
return null;
});
}
private static void saveToSyncStatsTable(final OffsetDateTime now, final SyncStats syncStats, final Long attemptId, final DSLContext ctx) {
// Although JOOQ supports upsert using the onConflict statement, we cannot use it as the table
// currently has duplicate records and also doesn't contain the unique constraint on the attempt_id
// column JOOQ requires. We are forced to check for existence.
final var isExisting = ctx.fetchExists(SYNC_STATS, SYNC_STATS.ATTEMPT_ID.eq(attemptId));
if (isExisting) {
ctx.update(SYNC_STATS)
.set(SYNC_STATS.UPDATED_AT, now)
.set(SYNC_STATS.BYTES_EMITTED, syncStats.getBytesEmitted())
.set(SYNC_STATS.RECORDS_EMITTED, syncStats.getRecordsEmitted())
.set(SYNC_STATS.ESTIMATED_RECORDS, syncStats.getEstimatedRecords())
.set(SYNC_STATS.ESTIMATED_BYTES, syncStats.getEstimatedBytes())
.set(SYNC_STATS.RECORDS_COMMITTED, syncStats.getRecordsCommitted())
.set(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED, syncStats.getSourceStateMessagesEmitted())
.set(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED, syncStats.getDestinationStateMessagesEmitted())
.set(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMaxSecondsBeforeSourceStateMessageEmitted())
.set(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMeanSecondsBeforeSourceStateMessageEmitted())
.set(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMaxSecondsBetweenStateMessageEmittedandCommitted())
.set(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMeanSecondsBetweenStateMessageEmittedandCommitted())
.where(SYNC_STATS.ATTEMPT_ID.eq(attemptId))
.execute();
return;
}
ctx.insertInto(SYNC_STATS)
.set(SYNC_STATS.ID, UUID.randomUUID())
.set(SYNC_STATS.CREATED_AT, now)
.set(SYNC_STATS.ATTEMPT_ID, attemptId)
.set(SYNC_STATS.UPDATED_AT, now)
.set(SYNC_STATS.BYTES_EMITTED, syncStats.getBytesEmitted())
.set(SYNC_STATS.RECORDS_EMITTED, syncStats.getRecordsEmitted())
.set(SYNC_STATS.ESTIMATED_RECORDS, syncStats.getEstimatedRecords())
.set(SYNC_STATS.ESTIMATED_BYTES, syncStats.getEstimatedBytes())
.set(SYNC_STATS.RECORDS_COMMITTED, syncStats.getRecordsCommitted())
.set(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED, syncStats.getSourceStateMessagesEmitted())
.set(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED, syncStats.getDestinationStateMessagesEmitted())
.set(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMaxSecondsBeforeSourceStateMessageEmitted())
.set(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED, syncStats.getMeanSecondsBeforeSourceStateMessageEmitted())
.set(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMaxSecondsBetweenStateMessageEmittedandCommitted())
.set(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED, syncStats.getMeanSecondsBetweenStateMessageEmittedandCommitted())
.execute();
}
private static void saveToStreamStatsTable(final OffsetDateTime now,
final List<StreamSyncStats> perStreamStats,
final Long attemptId,
final DSLContext ctx) {
Optional.ofNullable(perStreamStats).orElse(Collections.emptyList()).forEach(
streamStats -> {
// We cannot entirely rely on JOOQ's generated SQL for upserts as it does not support null fields
// for conflict detection. We are forced to separately check for existence.
final var stats = streamStats.getStats();
final var isExisting =
ctx.fetchExists(STREAM_STATS, STREAM_STATS.ATTEMPT_ID.eq(attemptId).and(STREAM_STATS.STREAM_NAME.eq(streamStats.getStreamName()))
.and(PersistenceHelpers.isNullOrEquals(STREAM_STATS.STREAM_NAMESPACE, streamStats.getStreamNamespace())));
if (isExisting) {
ctx.update(STREAM_STATS)
.set(STREAM_STATS.UPDATED_AT, now)
.set(STREAM_STATS.BYTES_EMITTED, stats.getBytesEmitted())
.set(STREAM_STATS.RECORDS_EMITTED, stats.getRecordsEmitted())
.set(STREAM_STATS.ESTIMATED_RECORDS, stats.getEstimatedRecords())
.set(STREAM_STATS.ESTIMATED_BYTES, stats.getEstimatedBytes())
.where(STREAM_STATS.ATTEMPT_ID.eq(attemptId))
.execute();
return;
}
ctx.insertInto(STREAM_STATS)
.set(STREAM_STATS.ID, UUID.randomUUID())
.set(STREAM_STATS.ATTEMPT_ID, attemptId)
.set(STREAM_STATS.STREAM_NAME, streamStats.getStreamName())
.set(STREAM_STATS.STREAM_NAMESPACE, streamStats.getStreamNamespace())
.set(STREAM_STATS.CREATED_AT, now)
.set(STREAM_STATS.UPDATED_AT, now)
.set(STREAM_STATS.BYTES_EMITTED, stats.getBytesEmitted())
.set(STREAM_STATS.RECORDS_EMITTED, stats.getRecordsEmitted())
.set(STREAM_STATS.ESTIMATED_BYTES, stats.getEstimatedBytes())
.set(STREAM_STATS.ESTIMATED_RECORDS, stats.getEstimatedRecords())
.set(STREAM_STATS.UPDATED_AT, now)
.set(STREAM_STATS.BYTES_EMITTED, stats.getBytesEmitted())
.set(STREAM_STATS.RECORDS_EMITTED, stats.getRecordsEmitted())
.set(STREAM_STATS.ESTIMATED_BYTES, stats.getEstimatedBytes())
.set(STREAM_STATS.ESTIMATED_RECORDS, stats.getEstimatedRecords())
.execute();
});
}
@Override
public void writeAttemptSyncConfig(final long jobId, final int attemptNumber, final AttemptSyncConfig attemptSyncConfig) throws IOException {
final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(
ctx -> ctx.update(ATTEMPTS)
.set(ATTEMPTS.ATTEMPT_SYNC_CONFIG, JSONB.valueOf(Jsons.serialize(attemptSyncConfig)))
.set(ATTEMPTS.UPDATED_AT, now)
.where(ATTEMPTS.JOB_ID.eq(jobId), ATTEMPTS.ATTEMPT_NUMBER.eq(attemptNumber))
.execute());
}
@Override
public void writeAttemptFailureSummary(final long jobId, final int attemptNumber, final AttemptFailureSummary failureSummary) throws IOException {
final OffsetDateTime now = OffsetDateTime.ofInstant(timeSupplier.get(), ZoneOffset.UTC);
jobDatabase.transaction(
ctx -> ctx.update(ATTEMPTS)
.set(ATTEMPTS.FAILURE_SUMMARY, JSONB.valueOf(Jsons.serialize(failureSummary)))
.set(ATTEMPTS.UPDATED_AT, now)
.where(ATTEMPTS.JOB_ID.eq(jobId), ATTEMPTS.ATTEMPT_NUMBER.eq(attemptNumber))
.execute());
}
@Override
public AttemptStats getAttemptStats(final long jobId, final int attemptNumber) throws IOException {
return jobDatabase
.query(ctx -> {
final Long attemptId = getAttemptId(jobId, attemptNumber, ctx);
final var syncStats = ctx.select(DSL.asterisk()).from(SYNC_STATS).where(SYNC_STATS.ATTEMPT_ID.eq(attemptId))
.orderBy(SYNC_STATS.UPDATED_AT.desc())
.fetchOne(getSyncStatsRecordMapper());
final var perStreamStats = ctx.select(DSL.asterisk()).from(STREAM_STATS).where(STREAM_STATS.ATTEMPT_ID.eq(attemptId))
.fetch(getStreamStatsRecordsMapper());
return new AttemptStats(syncStats, perStreamStats);
});
}
@Override
public Map<JobAttemptPair, AttemptStats> getAttemptStats(final List<Long> jobIds) throws IOException {
if (jobIds == null || jobIds.isEmpty()) {
return Map.of();
}
final var jobIdsStr = StringUtils.join(jobIds, ',');
return jobDatabase.query(ctx -> {
// Instead of one massive join query, separate this query into two queries for better readability
// for now.
// We can combine the queries at a later date if this still proves to be not efficient enough.
final Map<JobAttemptPair, AttemptStats> attemptStats = hydrateSyncStats(jobIdsStr, ctx);
hydrateStreamStats(jobIdsStr, ctx, attemptStats);
return attemptStats;
});
}
private static Map<JobAttemptPair, AttemptStats> hydrateSyncStats(final String jobIdsStr, final DSLContext ctx) {
final var attemptStats = new HashMap<JobAttemptPair, AttemptStats>();
final var syncResults = ctx.fetch(
"SELECT atmpt.attempt_number, atmpt.job_id,"
+ "stats.estimated_bytes, stats.estimated_records, stats.bytes_emitted, stats.records_emitted, stats.records_committed "
+ "FROM sync_stats stats "
+ "INNER JOIN attempts atmpt ON stats.attempt_id = atmpt.id "
+ "WHERE job_id IN ( " + jobIdsStr + ");");
syncResults.forEach(r -> {
final var key = new JobAttemptPair(r.get(ATTEMPTS.JOB_ID), r.get(ATTEMPTS.ATTEMPT_NUMBER));
final var syncStats = new SyncStats()
.withBytesEmitted(r.get(SYNC_STATS.BYTES_EMITTED))
.withRecordsEmitted(r.get(SYNC_STATS.RECORDS_EMITTED))
.withRecordsCommitted(r.get(SYNC_STATS.RECORDS_COMMITTED))
.withEstimatedRecords(r.get(SYNC_STATS.ESTIMATED_RECORDS))
.withEstimatedBytes(r.get(SYNC_STATS.ESTIMATED_BYTES));
attemptStats.put(key, new AttemptStats(syncStats, Lists.newArrayList()));
});
return attemptStats;
}
/**
* This method needed to be called after
* {@link DefaultJobPersistence#hydrateSyncStats(String, DSLContext)} as it assumes hydrateSyncStats
* has prepopulated the map.
*/
private static void hydrateStreamStats(final String jobIdsStr, final DSLContext ctx, final Map<JobAttemptPair, AttemptStats> attemptStats) {
final var streamResults = ctx.fetch(
"SELECT atmpt.attempt_number, atmpt.job_id, "
+ "stats.stream_name, stats.stream_namespace, stats.estimated_bytes, stats.estimated_records, stats.bytes_emitted, stats.records_emitted "
+ "FROM stream_stats stats "
+ "INNER JOIN attempts atmpt ON atmpt.id = stats.attempt_id "
+ "WHERE attempt_id IN "
+ "( SELECT id FROM attempts WHERE job_id IN ( " + jobIdsStr + "));");
streamResults.forEach(r -> {
final var streamSyncStats = new StreamSyncStats()
.withStreamNamespace(r.get(STREAM_STATS.STREAM_NAMESPACE))
.withStreamName(r.get(STREAM_STATS.STREAM_NAME))
.withStats(new SyncStats()
.withBytesEmitted(r.get(STREAM_STATS.BYTES_EMITTED))
.withRecordsEmitted(r.get(STREAM_STATS.RECORDS_EMITTED))
.withEstimatedRecords(r.get(STREAM_STATS.ESTIMATED_RECORDS))
.withEstimatedBytes(r.get(STREAM_STATS.ESTIMATED_BYTES)));
final var key = new JobAttemptPair(r.get(ATTEMPTS.JOB_ID), r.get(ATTEMPTS.ATTEMPT_NUMBER));
if (!attemptStats.containsKey(key)) {
LOGGER.error("{} stream stats entry does not have a corresponding sync stats entry. This suggest the database is in a bad state.", key);
return;
}
attemptStats.get(key).perStreamStats().add(streamSyncStats);
});
}
@Override
public List<NormalizationSummary> getNormalizationSummary(final long jobId, final int attemptNumber) throws IOException {
return jobDatabase
.query(ctx -> {
final Long attemptId = getAttemptId(jobId, attemptNumber, ctx);
return ctx.select(DSL.asterisk()).from(NORMALIZATION_SUMMARIES).where(NORMALIZATION_SUMMARIES.ATTEMPT_ID.eq(attemptId))
.fetch(getNormalizationSummaryRecordMapper())
.stream()
.toList();
});
}
@VisibleForTesting
static Long getAttemptId(final long jobId, final int attemptNumber, final DSLContext ctx) {
final Optional<Record> record =
ctx.fetch("SELECT id from attempts where job_id = ? AND attempt_number = ?", jobId,
attemptNumber).stream().findFirst();
if (record.isEmpty()) {
return -1L;
}
return record.get().get("id", Long.class);
}
private static RecordMapper<Record, SyncStats> getSyncStatsRecordMapper() {
return record -> new SyncStats().withBytesEmitted(record.get(SYNC_STATS.BYTES_EMITTED)).withRecordsEmitted(record.get(SYNC_STATS.RECORDS_EMITTED))
.withEstimatedBytes(record.get(SYNC_STATS.ESTIMATED_BYTES)).withEstimatedRecords(record.get(SYNC_STATS.ESTIMATED_RECORDS))
.withSourceStateMessagesEmitted(record.get(SYNC_STATS.SOURCE_STATE_MESSAGES_EMITTED))
.withDestinationStateMessagesEmitted(record.get(SYNC_STATS.DESTINATION_STATE_MESSAGES_EMITTED))
.withRecordsCommitted(record.get(SYNC_STATS.RECORDS_COMMITTED))
.withMeanSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MEAN_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED))
.withMaxSecondsBeforeSourceStateMessageEmitted(record.get(SYNC_STATS.MAX_SECONDS_BEFORE_SOURCE_STATE_MESSAGE_EMITTED))
.withMeanSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MEAN_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED))
.withMaxSecondsBetweenStateMessageEmittedandCommitted(record.get(SYNC_STATS.MAX_SECONDS_BETWEEN_STATE_MESSAGE_EMITTED_AND_COMMITTED));
}
private static RecordMapper<Record, StreamSyncStats> getStreamStatsRecordsMapper() {
return record -> {
final var stats = new SyncStats()
.withEstimatedRecords(record.get(STREAM_STATS.ESTIMATED_RECORDS)).withEstimatedBytes(record.get(STREAM_STATS.ESTIMATED_BYTES))
.withRecordsEmitted(record.get(STREAM_STATS.RECORDS_EMITTED)).withBytesEmitted(record.get(STREAM_STATS.BYTES_EMITTED));
return new StreamSyncStats()
.withStreamName(record.get(STREAM_STATS.STREAM_NAME)).withStreamNamespace(record.get(STREAM_STATS.STREAM_NAMESPACE))
.withStats(stats);
};
}
private static RecordMapper<Record, NormalizationSummary> getNormalizationSummaryRecordMapper() {
return record -> {
try {
return new NormalizationSummary().withStartTime(record.get(NORMALIZATION_SUMMARIES.START_TIME).toInstant().toEpochMilli())
.withEndTime(record.get(NORMALIZATION_SUMMARIES.END_TIME).toInstant().toEpochMilli())
.withFailures(record.get(NORMALIZATION_SUMMARIES.FAILURES, String.class) == null ? null : deserializeFailureReasons(record));
} catch (final JsonProcessingException e) {
throw new RuntimeException(e);
}
};
}
private static List<FailureReason> deserializeFailureReasons(final Record record) throws JsonProcessingException {
final ObjectMapper mapper = new ObjectMapper();
return List.of(mapper.readValue(String.valueOf(record.get(NORMALIZATION_SUMMARIES.FAILURES)), FailureReason[].class));
}
@Override
public Job getJob(final long jobId) throws IOException {
return jobDatabase.query(ctx -> getJob(ctx, jobId));
}
private Job getJob(final DSLContext ctx, final long jobId) {
return getJobOptional(ctx, jobId).orElseThrow(() -> new RuntimeException("Could not find job with id: " + jobId));
}
private Optional<Job> getJobOptional(final DSLContext ctx, final long jobId) {
return getJobFromResult(ctx.fetch(BASE_JOB_SELECT_AND_JOIN + "WHERE jobs.id = ?", jobId));
}
@Override
public Long getJobCount(final Set<ConfigType> configTypes, final String connectionId) throws IOException {
return jobDatabase.query(ctx -> ctx.selectCount().from(JOBS)
.where(JOBS.CONFIG_TYPE.in(Sqls.toSqlNames(configTypes)))
.and(JOBS.SCOPE.eq(connectionId))
.fetchOne().into(Long.class));
}
@Override
public List<Job> listJobs(final ConfigType configType, final String configId, final int pagesize, final int offset) throws IOException {
return listJobs(Set.of(configType), configId, pagesize, offset);
}
@Override
public List<Job> listJobs(final Set<ConfigType> configTypes, final String configId, final int pagesize, final int offset) throws IOException {
return jobDatabase.query(ctx -> {
final String jobsSubquery = "(" + ctx.select(DSL.asterisk()).from(JOBS)
.where(JOBS.CONFIG_TYPE.in(Sqls.toSqlNames(configTypes)))
.and(JOBS.SCOPE.eq(configId))
.orderBy(JOBS.CREATED_AT.desc(), JOBS.ID.desc())
.limit(pagesize)
.offset(offset)
.getSQL(ParamType.INLINED) + ") AS jobs";
return getJobsFromResult(ctx.fetch(jobSelectAndJoin(jobsSubquery) + ORDER_BY_JOB_TIME_ATTEMPT_TIME));
});
}
@Override
public List<Job> listJobsIncludingId(final Set<ConfigType> configTypes, final String connectionId, final long includingJobId, final int pagesize)
throws IOException {
final Optional<OffsetDateTime> includingJobCreatedAt = jobDatabase.query(ctx -> ctx.select(JOBS.CREATED_AT).from(JOBS)
.where(JOBS.CONFIG_TYPE.in(Sqls.toSqlNames(configTypes)))
.and(JOBS.SCOPE.eq(connectionId))
.and(JOBS.ID.eq(includingJobId))
.stream()
.findFirst()
.map(record -> record.get(JOBS.CREATED_AT, OffsetDateTime.class)));
if (includingJobCreatedAt.isEmpty()) {
return List.of();
}
final int countIncludingJob = jobDatabase.query(ctx -> ctx.selectCount().from(JOBS)
.where(JOBS.CONFIG_TYPE.in(Sqls.toSqlNames(configTypes)))
.and(JOBS.SCOPE.eq(connectionId))
.and(JOBS.CREATED_AT.greaterOrEqual(includingJobCreatedAt.get()))
.fetchOne().into(int.class));
// calculate the multiple of `pagesize` that includes the target job
final int pageSizeThatIncludesJob = (countIncludingJob / pagesize + 1) * pagesize;
return listJobs(configTypes, connectionId, pageSizeThatIncludesJob, 0);
}
@Override
public List<Job> listJobsWithStatus(final JobStatus status) throws IOException {
return listJobsWithStatus(Sets.newHashSet(ConfigType.values()), status);
}
@Override
public List<Job> listJobsWithStatus(final Set<ConfigType> configTypes, final JobStatus status) throws IOException {
return jobDatabase.query(ctx -> getJobsFromResult(ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(config_type AS VARCHAR) IN " + Sqls.toSqlInFragment(configTypes) + AND +
"CAST(jobs.status AS VARCHAR) = ? " +
ORDER_BY_JOB_TIME_ATTEMPT_TIME,
Sqls.toSqlName(status))));
}
@Override
public List<Job> listJobsWithStatus(final ConfigType configType, final JobStatus status) throws IOException {
return listJobsWithStatus(Sets.newHashSet(configType), status);
}
@Override
public List<Job> listJobsForConnectionWithStatuses(final UUID connectionId, final Set<ConfigType> configTypes, final Set<JobStatus> statuses)
throws IOException {
return jobDatabase.query(ctx -> getJobsFromResult(ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
SCOPE_CLAUSE +
"config_type IN " + Sqls.toSqlInFragment(configTypes) + AND +
"jobs.status IN " + Sqls.toSqlInFragment(statuses) + " " +
ORDER_BY_JOB_TIME_ATTEMPT_TIME,
connectionId.toString())));
}
@Override
public List<JobWithStatusAndTimestamp> listJobStatusAndTimestampWithConnection(final UUID connectionId,
final Set<ConfigType> configTypes,
final Instant jobCreatedAtTimestamp)
throws IOException {
final LocalDateTime timeConvertedIntoLocalDateTime = LocalDateTime.ofInstant(jobCreatedAtTimestamp, ZoneOffset.UTC);
final String JobStatusSelect = "SELECT id, status, created_at, updated_at FROM jobs ";
return jobDatabase.query(ctx -> ctx
.fetch(JobStatusSelect + WHERE +
SCOPE_CLAUSE +
"CAST(config_type AS VARCHAR) in " + Sqls.toSqlInFragment(configTypes) + AND +
"created_at >= ? ORDER BY created_at DESC", connectionId.toString(), timeConvertedIntoLocalDateTime))
.stream()
.map(r -> new JobWithStatusAndTimestamp(
r.get("id", Long.class),
JobStatus.valueOf(r.get("status", String.class).toUpperCase()),
r.get("created_at", Long.class) / 1000,
r.get("updated_at", Long.class) / 1000))
.toList();
}
@Override
public Optional<Job> getLastReplicationJob(final UUID connectionId) throws IOException {
return jobDatabase.query(ctx -> ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(jobs.config_type AS VARCHAR) in " + Sqls.toSqlInFragment(Job.REPLICATION_TYPES) + AND +
SCOPE_CLAUSE +
"CAST(jobs.status AS VARCHAR) <> ? " +
ORDER_BY_JOB_CREATED_AT_DESC + LIMIT_1,
connectionId.toString(),
Sqls.toSqlName(JobStatus.CANCELLED))
.stream()
.findFirst()
.flatMap(r -> getJobOptional(ctx, r.get(JOB_ID, Long.class))));
}
@Override
public Optional<Job> getLastSyncJob(final UUID connectionId) throws IOException {
return jobDatabase.query(ctx -> ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(jobs.config_type AS VARCHAR) = ? " + AND +
"scope = ? " +
ORDER_BY_JOB_CREATED_AT_DESC + LIMIT_1,
Sqls.toSqlName(ConfigType.SYNC),
connectionId.toString())
.stream()
.findFirst()
.flatMap(r -> getJobOptional(ctx, r.get(JOB_ID, Long.class))));
}
/**
* For each connection ID in the input, find that connection's latest sync job and return it if one
* exists.
*/
@Override
public List<Job> getLastSyncJobForConnections(final List<UUID> connectionIds) throws IOException {
if (connectionIds.isEmpty()) {
return Collections.emptyList();
}
return jobDatabase.query(ctx -> ctx
.fetch("SELECT DISTINCT ON (scope) * FROM jobs "
+ WHERE + "CAST(jobs.config_type AS VARCHAR) = ? "
+ AND + scopeInList(connectionIds)
+ "ORDER BY scope, created_at DESC",
Sqls.toSqlName(ConfigType.SYNC))
.stream()
.flatMap(r -> getJobOptional(ctx, r.get("id", Long.class)).stream())
.collect(Collectors.toList()));
}
/**
* For each connection ID in the input, find that connection's most recent non-terminal sync job and
* return it if one exists.
*/
@Override
public List<Job> getRunningSyncJobForConnections(final List<UUID> connectionIds) throws IOException {
if (connectionIds.isEmpty()) {
return Collections.emptyList();
}
return jobDatabase.query(ctx -> ctx
.fetch("SELECT DISTINCT ON (scope) * FROM jobs "
+ WHERE + "CAST(jobs.config_type AS VARCHAR) = ? "
+ AND + scopeInList(connectionIds)
+ AND + JOB_STATUS_IS_NON_TERMINAL
+ "ORDER BY scope, created_at DESC",
Sqls.toSqlName(ConfigType.SYNC))
.stream()
.flatMap(r -> getJobOptional(ctx, r.get("id", Long.class)).stream())
.collect(Collectors.toList()));
}
private String scopeInList(final Collection<UUID> connectionIds) {
return String.format("scope IN (%s) ",
connectionIds.stream()
.map(UUID::toString)
.map(Names::singleQuote)
.collect(Collectors.joining(",")));
}
@Override
public Optional<Job> getFirstReplicationJob(final UUID connectionId) throws IOException {
return jobDatabase.query(ctx -> ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(jobs.config_type AS VARCHAR) in " + Sqls.toSqlInFragment(Job.REPLICATION_TYPES) + AND +
SCOPE_CLAUSE +
"CAST(jobs.status AS VARCHAR) <> ? " +
"ORDER BY jobs.created_at ASC LIMIT 1",
connectionId.toString(),
Sqls.toSqlName(JobStatus.CANCELLED))
.stream()
.findFirst()
.flatMap(r -> getJobOptional(ctx, r.get(JOB_ID, Long.class))));
}
@Override
public Optional<Job> getNextJob() throws IOException {
// rules:
// 1. get oldest, pending job
// 2. job is excluded if another job of the same scope is already running
// 3. job is excluded if another job of the same scope is already incomplete
return jobDatabase.query(ctx -> ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(jobs.status AS VARCHAR) = 'pending' AND " +
"jobs.scope NOT IN ( SELECT scope FROM jobs WHERE status = 'running' OR status = 'incomplete' ) " +
"ORDER BY jobs.created_at ASC LIMIT 1")
.stream()
.findFirst()
.flatMap(r -> getJobOptional(ctx, r.get(JOB_ID, Long.class))));
}
@Override
public List<Job> listJobs(final ConfigType configType, final Instant attemptEndedAtTimestamp) throws IOException {
final LocalDateTime timeConvertedIntoLocalDateTime = LocalDateTime.ofInstant(attemptEndedAtTimestamp, ZoneOffset.UTC);
return jobDatabase.query(ctx -> getJobsFromResult(ctx
.fetch(BASE_JOB_SELECT_AND_JOIN + WHERE +
"CAST(config_type AS VARCHAR) = ? AND " +
" attempts.ended_at > ? ORDER BY jobs.created_at ASC, attempts.created_at ASC", Sqls.toSqlName(configType),
timeConvertedIntoLocalDateTime)));
}
@Override
public List<AttemptWithJobInfo> listAttemptsWithJobInfo(final ConfigType configType, final Instant attemptEndedAtTimestamp) throws IOException {
final LocalDateTime timeConvertedIntoLocalDateTime = LocalDateTime.ofInstant(attemptEndedAtTimestamp, ZoneOffset.UTC);
return jobDatabase.query(ctx -> getAttemptsWithJobsFromResult(ctx.fetch(
BASE_JOB_SELECT_AND_JOIN + WHERE + "CAST(config_type AS VARCHAR) = ? AND " + " attempts.ended_at > ? ORDER BY attempts.ended_at ASC",
Sqls.toSqlName(configType),
timeConvertedIntoLocalDateTime)));
}
@Override
public List<AttemptNormalizationStatus> getAttemptNormalizationStatusesForJob(final Long jobId) throws IOException {
return jobDatabase
.query(ctx -> ctx.select(ATTEMPTS.ATTEMPT_NUMBER, SYNC_STATS.RECORDS_COMMITTED, NORMALIZATION_SUMMARIES.FAILURES)
.from(ATTEMPTS)
.join(SYNC_STATS).on(SYNC_STATS.ATTEMPT_ID.eq(ATTEMPTS.ID))
.leftJoin(NORMALIZATION_SUMMARIES).on(NORMALIZATION_SUMMARIES.ATTEMPT_ID.eq(ATTEMPTS.ID))
.where(ATTEMPTS.JOB_ID.eq(jobId))
.fetch(record -> new AttemptNormalizationStatus(record.get(ATTEMPTS.ATTEMPT_NUMBER),
Optional.of(record.get(SYNC_STATS.RECORDS_COMMITTED)), record.get(NORMALIZATION_SUMMARIES.FAILURES) != null)));
}
// Retrieves only Job information from the record, without any attempt info
private static Job getJobFromRecord(final Record record) {
return new Job(record.get(JOB_ID, Long.class),
Enums.toEnum(record.get("config_type", String.class), ConfigType.class).orElseThrow(),
record.get("scope", String.class),
parseJobConfigFromString(record.get("config", String.class)),
new ArrayList<Attempt>(),
JobStatus.valueOf(record.get("job_status", String.class).toUpperCase()),
Optional.ofNullable(record.get("job_started_at")).map(value -> getEpoch(record, "started_at")).orElse(null),
getEpoch(record, "job_created_at"),
getEpoch(record, "job_updated_at"));
}
private static JobConfig parseJobConfigFromString(final String jobConfigString) {
final JobConfig jobConfig = Jsons.deserialize(jobConfigString, JobConfig.class);
// On-the-fly migration of persisted data types related objects (protocol v0->v1)
if (jobConfig.getConfigType() == ConfigType.SYNC && jobConfig.getSync() != null) {
// TODO feature flag this for data types rollout
// CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobConfig.getSync().getConfiguredAirbyteCatalog());
CatalogMigrationV1Helper.downgradeSchemaIfNeeded(jobConfig.getSync().getConfiguredAirbyteCatalog());
} else if (jobConfig.getConfigType() == ConfigType.RESET_CONNECTION && jobConfig.getResetConnection() != null) {
// TODO feature flag this for data types rollout
// CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobConfig.getResetConnection().getConfiguredAirbyteCatalog());
CatalogMigrationV1Helper.downgradeSchemaIfNeeded(jobConfig.getResetConnection().getConfiguredAirbyteCatalog());
}
return jobConfig;
}
private static Attempt getAttemptFromRecord(final Record record) {
final String attemptOutputString = record.get("attempt_output", String.class);
return new Attempt(
record.get(ATTEMPT_NUMBER, int.class),
record.get(JOB_ID, Long.class),
Path.of(record.get("log_path", String.class)),
record.get("attempt_sync_config", String.class) == null ? null
: Jsons.deserialize(record.get("attempt_sync_config", String.class), AttemptSyncConfig.class),
attemptOutputString == null ? null : parseJobOutputFromString(attemptOutputString),
Enums.toEnum(record.get("attempt_status", String.class), AttemptStatus.class).orElseThrow(),
record.get("processing_task_queue", String.class),
record.get("attempt_failure_summary", String.class) == null ? null
: Jsons.deserialize(record.get("attempt_failure_summary", String.class), AttemptFailureSummary.class),
getEpoch(record, "attempt_created_at"),
getEpoch(record, "attempt_updated_at"),
Optional.ofNullable(record.get("attempt_ended_at"))
.map(value -> getEpoch(record, "attempt_ended_at"))
.orElse(null));
}
private static JobOutput parseJobOutputFromString(final String jobOutputString) {
final JobOutput jobOutput = Jsons.deserialize(jobOutputString, JobOutput.class);
// On-the-fly migration of persisted data types related objects (protocol v0->v1)
if (jobOutput.getOutputType() == OutputType.DISCOVER_CATALOG && jobOutput.getDiscoverCatalog() != null) {
// TODO feature flag this for data types rollout
// CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobOutput.getDiscoverCatalog().getCatalog());
CatalogMigrationV1Helper.downgradeSchemaIfNeeded(jobOutput.getDiscoverCatalog().getCatalog());
} else if (jobOutput.getOutputType() == OutputType.SYNC && jobOutput.getSync() != null) {
// TODO feature flag this for data types rollout
// CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobOutput.getSync().getOutputCatalog());
CatalogMigrationV1Helper.downgradeSchemaIfNeeded(jobOutput.getSync().getOutputCatalog());
}
return jobOutput;
}
private static List<AttemptWithJobInfo> getAttemptsWithJobsFromResult(final Result<Record> result) {
return result
.stream()
.filter(record -> record.getValue(ATTEMPT_NUMBER) != null)
.map(record -> new AttemptWithJobInfo(getAttemptFromRecord(record), getJobFromRecord(record)))
.collect(Collectors.toList());
}