Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API,Core: Add scan planning metrics for scanned/skipped delete manifests #5792

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ interface ScanMetricsResult {
@Nullable
CounterResult skippedDeleteFiles();

@Nullable
CounterResult scannedDeleteManifests();

@Nullable
CounterResult skippedDeleteManifests();

static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
return ImmutableScanMetricsResult.builder()
Expand All @@ -146,6 +152,8 @@ static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
CounterResult.fromCounter(scanMetrics.totalDeleteFileSizeInBytes()))
.skippedDataFiles(CounterResult.fromCounter(scanMetrics.skippedDataFiles()))
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
.build();
}
}
Expand All @@ -157,11 +165,13 @@ abstract class ScanMetrics {
public static final String RESULT_DATA_FILES = "result-data-files";
public static final String RESULT_DELETE_FILES = "result-delete-files";
public static final String SCANNED_DATA_MANIFESTS = "scanned-data-manifests";
public static final String SCANNED_DELETE_MANIFESTS = "scanned-delete-manifests";
public static final String TOTAL_DATA_MANIFESTS = "total-data-manifests";
public static final String TOTAL_DELETE_MANIFESTS = "total-delete-manifests";
public static final String TOTAL_FILE_SIZE_IN_BYTES = "total-file-size-in-bytes";
public static final String TOTAL_DELETE_FILE_SIZE_IN_BYTES = "total-delete-file-size-in-bytes";
public static final String SKIPPED_DATA_MANIFESTS = "skipped-data-manifests";
public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";

Expand Down Expand Up @@ -226,6 +236,16 @@ public Counter skippedDeleteFiles() {
return metricsContext().counter(SKIPPED_DELETE_FILES, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter scannedDeleteManifests() {
return metricsContext().counter(SCANNED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should make COUNT the default

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created #5912 to do this

}

@Value.Derived
public Counter skippedDeleteManifests() {
return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}

public static ScanMetrics of(MetricsContext metricsContext) {
return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
Expand Down
13 changes: 9 additions & 4 deletions core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -529,16 +529,21 @@ private Iterable<CloseableIterable<ManifestEntry<DeleteFile>>> deleteManifestRea
caseSensitive);
});

Iterable<ManifestFile> matchingManifests =
CloseableIterable<ManifestFile> closeableDeleteManifests =
CloseableIterable.withNoopClose(deleteManifests);
CloseableIterable<ManifestFile> matchingManifests =
evalCache == null
? deleteManifests
: Iterables.filter(
deleteManifests,
? closeableDeleteManifests
: CloseableIterable.filter(
scanMetrics.skippedDeleteManifests(),
closeableDeleteManifests,
manifest ->
manifest.content() == ManifestContent.DELETES
&& (manifest.hasAddedFiles() || manifest.hasExistingFiles())
&& evalCache.get(manifest.partitionSpecId()).eval(manifest));

matchingManifests =
CloseableIterable.count(scanMetrics.scannedDeleteManifests(), matchingManifests);
return Iterables.transform(
matchingManifests,
manifest ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static String toJson(ScanMetricsResult metrics, boolean pretty) {
return JsonUtil.generate(gen -> toJson(metrics, gen), pretty);
}

@SuppressWarnings("checkstyle:CyclomaticComplexity")
static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOException {
Preconditions.checkArgument(null != metrics, "Invalid scan metrics: null");

Expand Down Expand Up @@ -97,6 +98,16 @@ static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOExcept
CounterResultParser.toJson(metrics.skippedDeleteFiles(), gen);
}

if (null != metrics.scannedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SCANNED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.scannedDeleteManifests(), gen);
}

if (null != metrics.skippedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SKIPPED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
}

gen.writeEndObject();
}

Expand Down Expand Up @@ -127,6 +138,10 @@ static ScanMetricsResult fromJson(JsonNode json) {
CounterResultParser.fromJson(ScanMetrics.TOTAL_DELETE_FILE_SIZE_IN_BYTES, json))
.skippedDataFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DATA_FILES, json))
.skippedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_FILES, json))
.scannedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
.skippedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,15 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(2);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);

// we should hit only a single data manifest and only a single data file
try (CloseableIterable<FileScanTask> fileScanTasks =
Expand All @@ -88,11 +92,15 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
Expand Down Expand Up @@ -124,11 +132,15 @@ public void scanningWithDeletes() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(2);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
Expand Down Expand Up @@ -157,7 +169,9 @@ public void scanningWithSkippedDataFiles() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
Expand All @@ -172,6 +186,7 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
table.newAppend().appendFile(FILE_A).appendFile(FILE_D).commit();
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_D2_DELETES).commit();
table.newRowDelta().addDeletes(FILE_B_DELETES).addDeletes(FILE_C2_DELETES).commit();
TableScan tableScan = table.newScan();

try (CloseableIterable<FileScanTask> fileScanTasks =
Expand All @@ -182,17 +197,19 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
assertThat(scanReport.tableName()).isEqualTo(tableName);
assertThat(scanReport.snapshotId()).isEqualTo(2L);
assertThat(scanReport.snapshotId()).isEqualTo(3L);
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(1);
assertThat(result.skippedDataFiles().value()).isEqualTo(1);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(1);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
Expand All @@ -191,6 +193,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
Expand Down Expand Up @@ -230,6 +234,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);

Expand Down Expand Up @@ -279,6 +285,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ "}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -110,6 +112,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
Expand Down Expand Up @@ -168,6 +172,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -242,6 +248,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ " }\n"
+ "}";
Expand Down