Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API,Core: Add scan planning metrics for scanned/skipped delete manifests #5792

Merged
Rate limit · GitHub

Access has been restricted

You have triggered a rate limit.

Please wait a few minutes before you try again;
in some cases this may take up to an hour.

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
API,Core: Add scan planning metrics for scanned/skipped delete manifests
Rate limit · GitHub

Access has been restricted

You have triggered a rate limit.

Please wait a few minutes before you try again;
in some cases this may take up to an hour.

nastra committed Sep 21, 2022
commit e389a01214434afa9b1a899b7751d943572795e1
20 changes: 20 additions & 0 deletions api/src/main/java/org/apache/iceberg/metrics/ScanReport.java
Original file line number Diff line number Diff line change
@@ -131,6 +131,12 @@ interface ScanMetricsResult {
@Nullable
CounterResult skippedDeleteFiles();

@Nullable
CounterResult scannedDeleteManifests();

@Nullable
CounterResult skippedDeleteManifests();

static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
Preconditions.checkArgument(null != scanMetrics, "Invalid scan metrics: null");
return ImmutableScanMetricsResult.builder()
@@ -146,6 +152,8 @@ static ScanMetricsResult fromScanMetrics(ScanMetrics scanMetrics) {
CounterResult.fromCounter(scanMetrics.totalDeleteFileSizeInBytes()))
.skippedDataFiles(CounterResult.fromCounter(scanMetrics.skippedDataFiles()))
.skippedDeleteFiles(CounterResult.fromCounter(scanMetrics.skippedDeleteFiles()))
.scannedDeleteManifests(CounterResult.fromCounter(scanMetrics.scannedDeleteManifests()))
.skippedDeleteManifests(CounterResult.fromCounter(scanMetrics.skippedDeleteManifests()))
.build();
}
}
@@ -157,11 +165,13 @@ abstract class ScanMetrics {
public static final String RESULT_DATA_FILES = "result-data-files";
public static final String RESULT_DELETE_FILES = "result-delete-files";
public static final String SCANNED_DATA_MANIFESTS = "scanned-data-manifests";
public static final String SCANNED_DELETE_MANIFESTS = "scanned-delete-manifests";
public static final String TOTAL_DATA_MANIFESTS = "total-data-manifests";
public static final String TOTAL_DELETE_MANIFESTS = "total-delete-manifests";
public static final String TOTAL_FILE_SIZE_IN_BYTES = "total-file-size-in-bytes";
public static final String TOTAL_DELETE_FILE_SIZE_IN_BYTES = "total-delete-file-size-in-bytes";
public static final String SKIPPED_DATA_MANIFESTS = "skipped-data-manifests";
public static final String SKIPPED_DELETE_MANIFESTS = "skipped-delete-manifests";
public static final String SKIPPED_DATA_FILES = "skipped-data-files";
public static final String SKIPPED_DELETE_FILES = "skipped-delete-files";

@@ -226,6 +236,16 @@ public Counter skippedDeleteFiles() {
return metricsContext().counter(SKIPPED_DELETE_FILES, MetricsContext.Unit.COUNT);
}

@Value.Derived
public Counter scannedDeleteManifests() {
return metricsContext().counter(SCANNED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should make COUNT the default

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created #5912 to do this

}

@Value.Derived
public Counter skippedDeleteManifests() {
return metricsContext().counter(SKIPPED_DELETE_MANIFESTS, MetricsContext.Unit.COUNT);
}

public static ScanMetrics of(MetricsContext metricsContext) {
return ImmutableScanMetrics.builder().metricsContext(metricsContext).build();
}
13 changes: 9 additions & 4 deletions core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
Original file line number Diff line number Diff line change
@@ -529,16 +529,21 @@ private Iterable<CloseableIterable<ManifestEntry<DeleteFile>>> deleteManifestRea
caseSensitive);
});

Iterable<ManifestFile> matchingManifests =
CloseableIterable<ManifestFile> closeableDeleteManifests =
CloseableIterable.withNoopClose(deleteManifests);
CloseableIterable<ManifestFile> matchingManifests =
evalCache == null
? deleteManifests
: Iterables.filter(
deleteManifests,
? closeableDeleteManifests
: CloseableIterable.filter(
scanMetrics.skippedDeleteManifests(),
closeableDeleteManifests,
manifest ->
manifest.content() == ManifestContent.DELETES
&& (manifest.hasAddedFiles() || manifest.hasExistingFiles())
&& evalCache.get(manifest.partitionSpecId()).eval(manifest));

matchingManifests =
CloseableIterable.count(scanMetrics.scannedDeleteManifests(), matchingManifests);
return Iterables.transform(
matchingManifests,
manifest ->
Original file line number Diff line number Diff line change
@@ -37,6 +37,7 @@ static String toJson(ScanMetricsResult metrics, boolean pretty) {
return JsonUtil.generate(gen -> toJson(metrics, gen), pretty);
}

@SuppressWarnings("checkstyle:CyclomaticComplexity")
static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOException {
Preconditions.checkArgument(null != metrics, "Invalid scan metrics: null");

@@ -97,6 +98,16 @@ static void toJson(ScanMetricsResult metrics, JsonGenerator gen) throws IOExcept
CounterResultParser.toJson(metrics.skippedDeleteFiles(), gen);
}

if (null != metrics.scannedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SCANNED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.scannedDeleteManifests(), gen);
}

if (null != metrics.skippedDeleteManifests()) {
gen.writeFieldName(ScanMetrics.SKIPPED_DELETE_MANIFESTS);
CounterResultParser.toJson(metrics.skippedDeleteManifests(), gen);
}

gen.writeEndObject();
}

@@ -127,6 +138,10 @@ static ScanMetricsResult fromJson(JsonNode json) {
CounterResultParser.fromJson(ScanMetrics.TOTAL_DELETE_FILE_SIZE_IN_BYTES, json))
.skippedDataFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DATA_FILES, json))
.skippedDeleteFiles(CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_FILES, json))
.scannedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SCANNED_DELETE_MANIFESTS, json))
.skippedDeleteManifests(
CounterResultParser.fromJson(ScanMetrics.SKIPPED_DELETE_MANIFESTS, json))
.build();
}
}
Original file line number Diff line number Diff line change
@@ -67,11 +67,15 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(2);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);

// we should hit only a single data manifest and only a single data file
try (CloseableIterable<FileScanTask> fileScanTasks =
@@ -88,11 +92,15 @@ public void scanningWithMultipleDataManifests() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(0L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
@@ -124,11 +132,15 @@ public void scanningWithDeletes() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(3);
assertThat(result.resultDeleteFiles().value()).isEqualTo(2);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(30L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(20L);
assertThat(result.skippedDataFiles().value()).isEqualTo(0);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
}

@Test
@@ -157,7 +169,9 @@ public void scanningWithSkippedDataFiles() throws IOException {
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(0);
assertThat(result.skippedDataManifests().value()).isEqualTo(1);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalDataManifests().value()).isEqualTo(2);
assertThat(result.totalDeleteManifests().value()).isEqualTo(0);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
@@ -172,6 +186,7 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(), formatVersion, reporter);
table.newAppend().appendFile(FILE_A).appendFile(FILE_D).commit();
table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_D2_DELETES).commit();
table.newRowDelta().addDeletes(FILE_B_DELETES).addDeletes(FILE_C2_DELETES).commit();
TableScan tableScan = table.newScan();

try (CloseableIterable<FileScanTask> fileScanTasks =
@@ -182,17 +197,19 @@ public void scanningWithSkippedDeleteFiles() throws IOException {
ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
assertThat(scanReport.tableName()).isEqualTo(tableName);
assertThat(scanReport.snapshotId()).isEqualTo(2L);
assertThat(scanReport.snapshotId()).isEqualTo(3L);
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
assertThat(result.resultDataFiles().value()).isEqualTo(1);
assertThat(result.resultDeleteFiles().value()).isEqualTo(1);
assertThat(result.skippedDataFiles().value()).isEqualTo(1);
assertThat(result.skippedDeleteFiles().value()).isEqualTo(1);
assertThat(result.scannedDataManifests().value()).isEqualTo(1);
assertThat(result.scannedDeleteManifests().value()).isEqualTo(1);
assertThat(result.skippedDataManifests().value()).isEqualTo(0);
assertThat(result.skippedDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDataManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(1);
assertThat(result.totalDeleteManifests().value()).isEqualTo(2);
assertThat(result.totalFileSizeInBytes().value()).isEqualTo(10L);
assertThat(result.totalDeleteFileSizeInBytes().value()).isEqualTo(10L);
}
Original file line number Diff line number Diff line change
@@ -176,6 +176,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);
Assertions.assertThat(
@@ -191,6 +193,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra\": \"value\",\"extra2\":23}"))
.isEqualTo(scanMetricsResult);
}
@@ -230,6 +234,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

ScanMetricsResult scanMetricsResult = ScanMetricsResult.fromScanMetrics(scanMetrics);

@@ -279,6 +285,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ "}";

Original file line number Diff line number Diff line change
@@ -82,6 +82,8 @@ public void extraFields() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
@@ -110,6 +112,8 @@ public void extraFields() {
+ "\"total-delete-file-size-in-bytes\":{\"unit\":\"bytes\",\"value\":23},"
+ "\"skipped-data-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-files\":{\"unit\":\"count\",\"value\":3},"
+ "\"scanned-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"skipped-delete-manifests\":{\"unit\":\"count\",\"value\":3},"
+ "\"extra-metric\":\"extra-val\"},"
+ "\"extra\":\"extraVal\"}"))
.usingRecursiveComparison()
@@ -168,6 +172,8 @@ public void roundTripSerde() {
scanMetrics.totalDeleteFileSizeInBytes().increment(23L);
scanMetrics.skippedDataFiles().increment(3L);
scanMetrics.skippedDeleteFiles().increment(3L);
scanMetrics.scannedDeleteManifests().increment(3L);
scanMetrics.skippedDeleteManifests().increment(3L);

String tableName = "roundTripTableName";
Schema projection =
@@ -242,6 +248,14 @@ public void roundTripSerde() {
+ " \"skipped-delete-files\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"scanned-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " },\n"
+ " \"skipped-delete-manifests\" : {\n"
+ " \"unit\" : \"count\",\n"
+ " \"value\" : 3\n"
+ " }\n"
+ " }\n"
+ "}";