Skip to content

Commit

Permalink
API,Core: Add scan planning metrics for scanned/skipped delete manifests
Browse files Browse the repository at this point in the history
  • Loading branch information
nastra committed Sep 21, 2022
1 parent 15b3e2f commit 3c2411d
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 7 deletions.
20 changes: 20 additions & 0 deletions api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ interface ScanMetricsResult {
@Nullable
CounterResult skippedDeleteFiles();

@Nullable
CounterResult scannedDeleteManifests();

@Nullable
CounterResult skippedDeleteManifests();

static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
return ImmutableScanMetricsResult.builder()
Expand All @@ -146,6 +152,8 @@ static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
CounterResult.fromCounter(scanMetrics.totalDeleteFileSizeInBytes()))
.skippedDataFiles(CounterResult.fromCounter(scanMetrics.skippedDataFiles()))
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
.build();
}
}
Expand All @@ -157,11 +165,13 @@ abstract class ScanMetrics {
public static final String RESULT_DATA_FILES = "result-data-files";
public static final String RESULT_DELETE_FILES = "result-delete-files";
public static final String SCANNED_DATA_MANIFESTS = "scanned-data-manifests";
public static final String SCANNED_DELETE_MANIFESTS = "scanned-delete-manifests";
public static final String TOTAL_DATA_MANIFESTS = "total-data-manifests";
public static final String TOTAL_DELETE_MANIFESTS = "total-delete-manifests";
public static final String TOTAL_FILE_SIZE_IN_BYTES = "total-file-size-in-bytes";
public static final String TOTAL_DELETE_FILE_SIZE_IN_BYTES = "total-delete-file-size-in-bytes";
public static final String SKIPPED_DATA_MANIFESTS = "skipped-data-manifests";
public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";

Expand Down Expand Up @@ -226,6 +236,16 @@ public Counter skippedDeleteFiles() {
return metricsContext().counter(SKIPPED_DELETE_FILES, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter scannedDeleteManifests() {
return metricsContext().counter(SCANNED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter skippedDeleteManifests() {
return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}

public static ScanMetrics of(MetricsContext metricsContext) {
return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
Expand Down
13 changes: 9 additions & 4 deletions core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -529,16 +529,21 @@ private Iterable<CloseableIterable<ManifestEntry<DeleteFile>>> deleteManifestRea
caseSensitive);
});

Iterable<ManifestFile> matchingManifests =
CloseableIterable<ManifestFile> closeableDeleteManifests =
CloseableIterable.withNoopClose(deleteManifests);
CloseableIterable<ManifestFile> matchingManifests =
evalCache == null
? deleteManifests
: Iterables.filter(
deleteManifests,
? closeableDeleteManifests
: CloseableIterable.filter(
scanMetrics.skippedDeleteManifests(),
closeableDeleteManifests,
manifest ->
manifest.content() == ManifestContent.DELETES
&& (manifest.hasAddedFiles() || manifest.hasExistingFiles())
&& evalCache.get(manifest.partitionSpecId()).eval(manifest));

matchingManifests =
CloseableIterable.count(scanMetrics.scannedDeleteManifests(), matchingManifests);
return Iterables.transform(
matchingManifests,
manifest ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static String toJson(ScanMetricsResult metrics, boolean pretty) {
return JsonUtil.generate(gen -> toJson(metrics, gen), pretty);
}

@SuppressWarnings("checkstyle:CyclomaticComplexity")
static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOException {
Preconditions.checkArgument(null != metrics, "Invalid scan metrics: null");

Expand Down Expand Up @@ -97,6 +98,16 @@ static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOExcept
CounterResultParser.toJson(metrics.skippedDeleteFiles(), gen);
}

if (null != metrics.scannedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SCANNED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.scannedDeleteManifests(), gen);
}

if (null != metrics.skippedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SKIPPED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
}

gen.writeEndObject();
}

Expand Down Expand Up @@ -127,6 +138,10 @@ static ScanMetricsResult fromJson(JsonNode json) {
CounterResultParser.fromJson(ScanMetrics.TOTAL_DELETE_FILE_SIZE_IN_BYTES, json))
.skippedDataFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DATA_FILES, json))
.skippedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_FILES, json))
.scannedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
.skippedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,21 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(scanReport).isNotNull();

assertThat(scanReport.tableName()).isEqualTo(tableName);
assertThat(scanReport.snapshotId()).isEqualTo(2L);
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(scanReport.snapshotId()).isEqualTo(2L);
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(2);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);

// we should hit only a single data manifest and only a single data file
try (CloseableIterable<FileScanTask> fileScanTasks =
Expand All @@ -93,6 +97,8 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
Expand Down Expand Up @@ -124,11 +130,15 @@ public void scanningWithDeletes() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(2);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
Expand Down Expand Up @@ -157,7 +167,9 @@ public void scanningWithSkippedDataFiles() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
Expand All @@ -172,6 +184,7 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
table.newAppend().appendFile(FILE_A).appendFile(FILE_D).commit();
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_D2_DELETES).commit();
table.newRowDelta().addDeletes(FILE_B_DELETES).addDeletes(FILE_C2_DELETES).commit();
TableScan tableScan = table.newScan();

try (CloseableIterable<FileScanTask> fileScanTasks =
Expand All @@ -182,17 +195,19 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
assertThat(scanReport.tableName()).isEqualTo(tableName);
assertThat(scanReport.snapshotId()).isEqualTo(2L);
assertThat(scanReport.snapshotId()).isEqualTo(3L);
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(1);
assertThat(result.skippedDataFiles().value()).isEqualTo(1);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(1);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
Expand All @@ -191,6 +193,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
Expand Down Expand Up @@ -230,6 +234,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);

Expand Down Expand Up @@ -279,6 +285,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ "}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -110,6 +112,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
Expand Down Expand Up @@ -168,6 +172,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
Expand Down Expand Up @@ -242,6 +248,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ " }\n"
+ "}";
Expand Down

0 comments on commit 3c2411d

Please sign in to comment.