Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8342040: Further improve entry lookup performance for multi-release JARs #21489

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion src/java.base/share/classes/java/util/jar/JarFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ private String getBasename(String name) {

private JarEntry getVersionedEntry(String name, JarEntry defaultEntry) {
if (!name.startsWith(META_INF)) {
int[] versions = JUZFA.getMetaInfVersions(this);
int[] versions = JUZFA.getMetaInfVersions(this, name);
if (BASE_VERSION_FEATURE < versionFeature && versions.length > 0) {
// search for versioned entry
for (int i = versions.length - 1; i >= 0; i--) {
Expand Down
50 changes: 32 additions & 18 deletions src/java.base/share/classes/java/util/zip/ZipFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
Expand All @@ -64,6 +65,7 @@
import jdk.internal.access.JavaUtilJarAccess;
import jdk.internal.access.SharedSecrets;
import jdk.internal.util.ArraysSupport;
import jdk.internal.util.DecimalDigits;
import jdk.internal.util.OperatingSystem;
import jdk.internal.perf.PerfCounter;
import jdk.internal.ref.CleanerFactory;
Expand Down Expand Up @@ -1096,10 +1098,10 @@ private String getManifestName(boolean onlyIfSignatureRelatedFiles) {
* optimization when looking up potentially versioned entries.
* Returns an empty array if no versioned entries exist.
*/
private int[] getMetaInfVersions() {
private int[] getMetaInfVersions(String name) {
synchronized (this) {
ensureOpen();
return res.zsrc.metaVersions;
return res.zsrc.metaVersions.getOrDefault(name, Source.EMPTY_META_VERSIONS);
}
}

Expand Down Expand Up @@ -1139,8 +1141,8 @@ public String getManifestName(JarFile jar, boolean onlyIfHasSignatureRelatedFile
return ((ZipFile)jar).getManifestName(onlyIfHasSignatureRelatedFiles);
}
@Override
public int[] getMetaInfVersions(JarFile jar) {
return ((ZipFile)jar).getMetaInfVersions();
public int[] getMetaInfVersions(JarFile jar, String name) {
return ((ZipFile)jar).getMetaInfVersions(name);
}
@Override
public Enumeration<JarEntry> entries(ZipFile zip) {
Expand Down Expand Up @@ -1175,6 +1177,8 @@ private static class Source {
private static final JavaUtilJarAccess JUJA = SharedSecrets.javaUtilJarAccess();
// "META-INF/".length()
private static final int META_INF_LEN = 9;
// "META-INF/versions//".length()
private static final int META_INF_VERSIONS_LEN = 19;
private static final int[] EMPTY_META_VERSIONS = new int[0];
// CEN size is limited to the maximum array size in the JDK
private static final int MAX_CEN_SIZE = ArraysSupport.SOFT_MAX_ARRAY_LENGTH;
Expand All @@ -1192,7 +1196,7 @@ private static class Source {
private int manifestPos = -1; // position of the META-INF/MANIFEST.MF, if exists
private int manifestNum = 0; // number of META-INF/MANIFEST.MF, case insensitive
private int[] signatureMetaNames; // positions of signature related entries, if such exist
private int[] metaVersions; // list of unique versions found in META-INF/versions/
private Map<String, int[]> metaVersions; // Set of versions found in META-INF/versions/, by entry name
private final boolean startsWithLoc; // true, if ZIP file starts with LOCSIG (usually true)

// A Hashmap for all entries.
Expand Down Expand Up @@ -1574,7 +1578,7 @@ private void close() throws IOException {
manifestPos = -1;
manifestNum = 0;
signatureMetaNames = null;
metaVersions = EMPTY_META_VERSIONS;
metaVersions = null;
}

private static final int BUF_SIZE = 8192;
Expand Down Expand Up @@ -1756,8 +1760,9 @@ private void initCEN(int knownTotal) throws IOException {

// list for all meta entries
ArrayList<Integer> signatureNames = null;
// Set of all version numbers seen in META-INF/versions/
Set<Integer> metaVersionsSet = null;
// Map entry name to the set of versions seen in
// META-INF/versions/{version}/{name}
Map<String, Set<Integer>> metaVersionsMap = null;

// Iterate through the entries in the central directory
int idx = 0; // Index into the entries array
Expand Down Expand Up @@ -1796,9 +1801,13 @@ private void initCEN(int knownTotal) throws IOException {
// performance in multi-release jar files
int version = getMetaVersion(entryPos + META_INF_LEN, nlen - META_INF_LEN);
if (version > 0) {
if (metaVersionsSet == null)
metaVersionsSet = new TreeSet<>();
metaVersionsSet.add(version);
int versionPrefix = META_INF_VERSIONS_LEN + DecimalDigits.stringSize(version);
// Extract name from "META-INF/versions/{version)/{name}
String name = zipCoderForPos(pos).toString(cen, entryPos + versionPrefix, nlen - versionPrefix);
// Add version for name
if (metaVersionsMap == null)
metaVersionsMap = new HashMap<>();
metaVersionsMap.computeIfAbsent(name, n -> new TreeSet<>()).add(version);
}
}
}
Expand All @@ -1816,14 +1825,19 @@ private void initCEN(int knownTotal) throws IOException {
signatureMetaNames[j] = signatureNames.get(j);
}
}
if (metaVersionsSet != null) {
metaVersions = new int[metaVersionsSet.size()];
int c = 0;
for (Integer version : metaVersionsSet) {
metaVersions[c++] = version;
if (metaVersionsMap != null) {
metaVersions = new HashMap<>();
for (var entry : metaVersionsMap.entrySet()) {
// Convert TreeSet<Integer> to int[] for performance
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you just require order upon final freezing, you can just use a HashSet or ArrayList, and perform the sorting and deduplication when you compile to an int array.

Copy link
Contributor Author

@eirbjo eirbjo Oct 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we trust that versioned entries are rare, this should not matter much either way. But I pushed a commit which uses HashSet and Arrays::sort on freezing instead. WDYT?

Given that most versioned entries will only have a single version, we may save some memory and speed by special-casing for sets with one or two versions:

switch (metaVersionsMap.get(name)) {
    case null -> {
        // Special-case for set of size 1
        metaVersionsMap.put(name, Set.of(version));
    }
    case Set<Integer> versions when versions.size() == 1 -> {
        // Special-case for set of size 2
        metaVersionsMap.put(name, Set.of(version, versions.iterator().next()));
    }
    case Set<Integer> versions when versions.size() == 2 -> {
        // Now we convert to HashSet
        HashSet<Integer> set = new HashSet<>(versions);
        set.add(version);
        metaVersionsMap.put(name, set);
    }
    case HashSet<Integer> versions -> {
        // Just add
        versions.add(version);
    }
    default -> throw new AssertionError("Illegal state");
}

But again, versioned entries are rare, so perhaps better to keep it simple. Does @cl4es have thoughts about this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like simple.

Mixing Set.of and HashSet means a lot of semantic fu-fu (throwing/not throwing on nulls) and risks being suboptimal due making some call sites megamorphic.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, looks good. And I agree with Claes' evaluation that we should not complicate this construction further, especially that the result is quickly garbage collected when we compile into the string to int array map.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also just use Map<String, int[]>. This would allow us to skip the whole Set<Integer> to int[] transition step, and the temporary HashMap is no longer needed.

Deduplication could occur during registration:

metaVersions.merge(name, new int[] {version}, (current, val) -> {
    // Check duplicates
    for (int v : current) {
        if (v == version) {
            return current;
        }
    }
    // Add version
    int[] merged = Arrays.copyOf(current, current.length + 1);
    merged[merged.length - 1] = version;
    return merged;
});

while the postprocessing step would simply sort each array:

for (int[] versions : metaVersions.values()) {
    // JarFile::getVersionedEntry expects sorted versions
    Arrays.sort(versions);
}

This performs ~10% better on a version-heavy ZipFileOpen, and I think the code is reasonably simple compared to the current state of the PR. Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect remaining regression is mostly from the added String creation/processing in initCEN.

Yes, there is a cost with the String construction when parsing the name out of META-INF/versions/{name}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few possible strategies for avoiding that additional parse, since the effect we're getting at here is to have a quick filter to avoid pointless lookups and not necessarily an exact mapping.

One is to store the checkedHash rather than the full String. This gets openCloseZipFile down to ~910000 ns/op. (checkedHash very hot in profiles). There's still a chance for redundant lookups on hash collisions, but this should be rare.

Another is to store a BitSet per name length. This gets ZipFileOpen down to baseline level (~670000 ns/op), but increases chance of having to do redundant lookups a lot.

Both also improves footprint (not keeping each versioned entry String in memory would be nice).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One is to store the checkedHash rather than the full String. This gets openCloseZipFile down to ~910000 ns/op. (checkedHash very hot in profiles). There's still a chance for redundant lookups on hash collisions, but this should be rare.

Seems like a resonable trade-off. Could you take a look at the latest 771488e and see if that represents your suggestion here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, 771488e matches my quick experiment (sans some cleanups you've made). I agree this variant makes for a reasonable trade-off.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe there's some benefit in detecting "META-INF/versions/" in the checkAndAddEntry method and then calculate the two different hash values back to back. Hash calculations are fast on modern hardware with vector support, but the memory accesses might not be so we might get a general win from re-arranging some of this stuff. I think this is more suitable for a follow-up experiment, though.

int[] versions = new int[entry.getValue().size()];
int c = 0;
for (Integer i : entry.getValue()) {
versions[c++] = i.intValue();
}
metaVersions.put(entry.getKey(), versions);
}
} else {
metaVersions = EMPTY_META_VERSIONS;
metaVersions = Map.of();
}
if (pos != cen.length) {
zerror("invalid CEN header (bad header size)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public interface JavaUtilZipFileAccess {
public List<String> getManifestAndSignatureRelatedFiles(JarFile zip);
public String getManifestName(JarFile zip, boolean onlyIfSignatureRelatedFiles);
public int getManifestNum(JarFile zip);
public int[] getMetaInfVersions(JarFile zip);
public int[] getMetaInfVersions(JarFile zip, String name);
public Enumeration<JarEntry> entries(ZipFile zip);
public Stream<JarEntry> stream(ZipFile zip);
public Stream<String> entryNameStream(ZipFile zip);
Expand Down
23 changes: 20 additions & 3 deletions test/micro/org/openjdk/bench/java/util/jar/JarFileGetEntry.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -31,9 +31,12 @@
import java.nio.file.Files;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.jar.Attributes;
import java.util.jar.JarFile;
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

/**
* Simple benchmark measuring cost of looking up entries in a jar file.
Expand Down Expand Up @@ -71,6 +74,9 @@ public class JarFileGetEntry {
@Param({"512", "1024"})
private int size;

@Param({"false", "true"})
private boolean mr;

public JarFile jarFile;
public String[] entryNames;
public String[] missingEntryNames;
Expand All @@ -91,9 +97,20 @@ public void beforeRun() throws IOException {
entryNames = new String[size];
missingEntryNames = new String[size];

Manifest man = new Manifest();
man.getMainAttributes().put(Attributes.Name.MANIFEST_VERSION, "1.0");
if (mr) {
man.getMainAttributes().put(Attributes.Name.MULTI_RELEASE, "true");
}
try (FileOutputStream fos = new FileOutputStream(tempFile);
JarOutputStream jos = new JarOutputStream(fos)) {
JarOutputStream jos = new JarOutputStream(fos, man)) {

if (mr) {
// Add a few versioned entries
jos.putNextEntry(new ZipEntry("META-INF/versions/9/module-info.class"));
jos.putNextEntry(new ZipEntry("META-INF/versions/17/foo/library/Library.class"));
jos.putNextEntry(new ZipEntry("META-INF/versions/21/foo/library/Library.class"));
}
Random random = new Random(4711);
for (int i = 0; i < size; i++) {
String ename = "entry-" + (random.nextInt(90000) + 10000) + "-" + i;
Expand All @@ -107,7 +124,7 @@ public void beforeRun() throws IOException {
}
}

jarFile = new JarFile(tempFile);
jarFile = new JarFile(tempFile, true, ZipFile.OPEN_READ, mr ? JarFile.runtimeVersion() : JarFile.baseVersion());
}

@Benchmark
Expand Down