Skip to content

Commit 3b68b16

Browse files
committed
Add METRO hash implementation, change the default, fix the web site
1 parent 398775b commit 3b68b16

File tree

8 files changed

+181
-86
lines changed

8 files changed

+181
-86
lines changed

pom.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,18 @@ under the License.
231231
<version>${mockitoVersion}</version>
232232
<scope>test</scope>
233233
</dependency>
234+
<dependency>
235+
<groupId>org.openjdk.jmh</groupId>
236+
<artifactId>jmh-core</artifactId>
237+
<version>1.36</version>
238+
<scope>test</scope>
239+
</dependency>
240+
<dependency>
241+
<groupId>org.openjdk.jmh</groupId>
242+
<artifactId>jmh-generator-annprocess</artifactId>
243+
<version>1.36</version>
244+
<scope>test</scope>
245+
</dependency>
234246
</dependencies>
235247

236248
<build>

src/main/java/org/apache/maven/buildcache/hash/HashFactory.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import java.util.HashMap;
2323
import java.util.Map;
2424

25+
import net.openhft.hashing.LongHashFunction;
26+
2527
/**
2628
* HashFactory
2729
*/
@@ -30,8 +32,10 @@ public enum HashFactory {
3032
SHA256(new SHA("SHA-256")),
3133
SHA384(new SHA("SHA-384")),
3234
SHA512(new SHA("SHA-512")),
33-
XX(new XX()),
34-
XXMM(new XXMM());
35+
XX(new Zah("XX", LongHashFunction.xx(), false)),
36+
XXMM(new Zah("XXMM", LongHashFunction.xx(), true)),
37+
METRO(new Zah("METRO", LongHashFunction.metro(), false)),
38+
METRO_MM(new Zah("METRO+MM", LongHashFunction.metro(), true));
3539

3640
private static final Map<String, HashFactory> LOOKUP = new HashMap<>();
3741

src/main/java/org/apache/maven/buildcache/hash/XXMM.java

Lines changed: 0 additions & 60 deletions
This file was deleted.

src/main/java/org/apache/maven/buildcache/hash/XX.java renamed to src/main/java/org/apache/maven/buildcache/hash/Zah.java

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,43 +20,55 @@
2020

2121
import java.io.IOException;
2222
import java.nio.ByteBuffer;
23+
import java.nio.channels.FileChannel;
2324
import java.nio.file.Files;
2425
import java.nio.file.Path;
2526

2627
import net.openhft.hashing.LongHashFunction;
2728

29+
import static java.nio.channels.FileChannel.MapMode.READ_ONLY;
30+
import static java.nio.file.StandardOpenOption.READ;
31+
2832
/**
29-
* XX
33+
* Zero-Allocation-Hash based factory
3034
*/
31-
public class XX implements Hash.Factory {
35+
public class Zah implements Hash.Factory {
36+
37+
private final String name;
38+
private final LongHashFunction hash;
39+
private final boolean useMemoryMappedBuffers;
3240

33-
static final LongHashFunction INSTANCE = LongHashFunction.xx();
41+
public Zah(String name, LongHashFunction hash, boolean useMemoryMappedBuffers) {
42+
this.name = name;
43+
this.hash = hash;
44+
this.useMemoryMappedBuffers = useMemoryMappedBuffers;
45+
}
3446

3547
@Override
3648
public String getAlgorithm() {
37-
return "XX";
49+
return name;
3850
}
3951

4052
@Override
4153
public Hash.Algorithm algorithm() {
42-
return new XX.Algorithm();
54+
return useMemoryMappedBuffers ? new AlgorithmWithMM() : new Algorithm();
4355
}
4456

4557
@Override
4658
public Hash.Checksum checksum(int count) {
47-
return new XX.Checksum(ByteBuffer.allocate(capacity(count)));
59+
return new Zah.Checksum(ByteBuffer.allocate(capacity(count)));
4860
}
4961

5062
static int capacity(int count) {
5163
// Java 8: Long.BYTES
5264
return count * Long.SIZE / Byte.SIZE;
5365
}
5466

55-
static class Algorithm implements Hash.Algorithm {
67+
class Algorithm implements Hash.Algorithm {
5668

5769
@Override
5870
public byte[] hash(byte[] array) {
59-
return HexUtils.toByteArray(INSTANCE.hashBytes(array));
71+
return HexUtils.toByteArray(hash.hashBytes(array));
6072
}
6173

6274
@Override
@@ -65,7 +77,23 @@ public byte[] hash(Path path) throws IOException {
6577
}
6678
}
6779

68-
static class Checksum implements Hash.Checksum {
80+
class AlgorithmWithMM implements Hash.Algorithm {
81+
82+
@Override
83+
public byte[] hash(byte[] array) {
84+
return HexUtils.toByteArray(hash.hashBytes(array));
85+
}
86+
87+
@Override
88+
public byte[] hash(Path path) throws IOException {
89+
try (FileChannel channel = FileChannel.open(path, READ);
90+
CloseableBuffer buffer = CloseableBuffer.mappedBuffer(channel, READ_ONLY)) {
91+
return HexUtils.toByteArray(hash.hashBytes(buffer.getBuffer()));
92+
}
93+
}
94+
}
95+
96+
class Checksum implements Hash.Checksum {
6997

7098
private final ByteBuffer buffer;
7199

@@ -80,7 +108,7 @@ public void update(byte[] hash) {
80108

81109
@Override
82110
public byte[] digest() {
83-
return HexUtils.toByteArray(INSTANCE.hashBytes(buffer, 0, buffer.position()));
111+
return HexUtils.toByteArray(hash.hashBytes(buffer, 0, buffer.position()));
84112
}
85113
}
86114
}

src/main/mdo/build-cache-config.mdo

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ under the License.
183183
<field>
184184
<name>hashAlgorithm</name>
185185
<type>String</type>
186-
<defaultValue>XX</defaultValue>
187-
<description>One of XX, XXMM, SHA-1, SHA-256, SHA-384, SHA-512</description>
186+
<defaultValue>METRO</defaultValue>
187+
<description>One of METRO, XX, METRO+MM, XXMM, SHA-1, SHA-256, SHA-384, SHA-512</description>
188188
</field>
189189
<field>
190190
<name>validateXml</name>

src/site/markdown/performance.md

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,11 @@ performance, effect of performance optimizations should be carefully measured in
2828

2929
### Hash algorithm selection
3030

31-
By default, cache uses SHA-256 algorithm which is sufficiently fast and provides negligible probability of hash
32-
collisions. In projects with large codebase, performance of hash algorithms becomes more important and in such
33-
scenarios [XX](https://cyan4973.github.io/xxHash/) or XXMM (memory mapped files) hashing algorithms provide better
34-
performance.
31+
By default, cache uses METRO algorithm which is very fast and should not provide a negligible probability of hash
32+
collisions.
3533

3634
```xml
37-
<hashAlgorithm>XX</hashAlgorithm>
38-
```
39-
40-
or
41-
```xml
42-
43-
<hashAlgorithm>XXMM</hashAlgorithm>
35+
<hashAlgorithm>METRO</hashAlgorithm>
4436
```
4537

4638
### Filter out unnecessary/huge artifacts
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.maven.buildcache.hash;
20+
21+
import java.io.IOException;
22+
import java.nio.file.Files;
23+
import java.nio.file.Path;
24+
import java.nio.file.Paths;
25+
import java.util.List;
26+
import java.util.concurrent.TimeUnit;
27+
import java.util.stream.Collectors;
28+
import java.util.stream.Stream;
29+
30+
import org.openjdk.jmh.annotations.Benchmark;
31+
import org.openjdk.jmh.annotations.BenchmarkMode;
32+
import org.openjdk.jmh.annotations.Level;
33+
import org.openjdk.jmh.annotations.Mode;
34+
import org.openjdk.jmh.annotations.OutputTimeUnit;
35+
import org.openjdk.jmh.annotations.Scope;
36+
import org.openjdk.jmh.annotations.Setup;
37+
import org.openjdk.jmh.annotations.State;
38+
import org.openjdk.jmh.annotations.Warmup;
39+
import org.openjdk.jmh.runner.Runner;
40+
import org.openjdk.jmh.runner.RunnerException;
41+
import org.openjdk.jmh.runner.options.Options;
42+
import org.openjdk.jmh.runner.options.OptionsBuilder;
43+
import org.openjdk.jmh.runner.options.TimeValue;
44+
45+
@BenchmarkMode(Mode.Throughput)
46+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
47+
@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
48+
public class PerfTest {
49+
50+
@State(Scope.Benchmark)
51+
public static class HashState {
52+
List<Path> paths;
53+
54+
@Setup(Level.Iteration)
55+
public void setUp() throws IOException {
56+
try (Stream<Path> stream = Files.walk(Paths.get(System.getProperty("user.dir")))) {
57+
paths = stream.filter(p -> p.getFileName().toString().endsWith(".java"))
58+
.collect(Collectors.toList());
59+
}
60+
}
61+
}
62+
63+
String doTest(HashFactory hashFactory, HashState state) throws IOException {
64+
HashAlgorithm hash = hashFactory.createAlgorithm();
65+
StringBuilder sb = new StringBuilder();
66+
for (Path path : state.paths) {
67+
if (sb.length() > 0) {
68+
sb.append("\n");
69+
}
70+
sb.append(hash.hash(path));
71+
}
72+
return sb.toString();
73+
}
74+
75+
@Benchmark
76+
public String SHA1(HashState state) throws IOException {
77+
return doTest(HashFactory.SHA1, state);
78+
}
79+
80+
@Benchmark
81+
public String SHA256(HashState state) throws IOException {
82+
return doTest(HashFactory.SHA256, state);
83+
}
84+
85+
@Benchmark
86+
public String XX(HashState state) throws IOException {
87+
return doTest(HashFactory.XX, state);
88+
}
89+
90+
@Benchmark
91+
public String XXMM(HashState state) throws IOException {
92+
return doTest(HashFactory.XXMM, state);
93+
}
94+
95+
@Benchmark
96+
public String METRO(HashState state) throws IOException {
97+
return doTest(HashFactory.METRO, state);
98+
}
99+
100+
@Benchmark
101+
public String METRO_MM(HashState state) throws IOException {
102+
return doTest(HashFactory.METRO_MM, state);
103+
}
104+
105+
/*
106+
* <p>main.</p>
107+
*
108+
* @param args a {@link java.lang.String} object.
109+
* @throws org.openjdk.jmh.runner.RunnerException if any.
110+
*/
111+
public static void main(String... args) throws RunnerException {
112+
Options opts = new OptionsBuilder()
113+
.measurementIterations(3)
114+
.measurementTime(TimeValue.milliseconds(3000))
115+
.forks(1)
116+
.build();
117+
new Runner(opts).run();
118+
}
119+
}

src/test/java/org/apache/maven/buildcache/xml/CacheConfigImplTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ private void assertDefaults(Map<String, Runnable> overrides) {
161161
asserts.put(
162162
"getGlobalIncludePaths",
163163
() -> assertEquals(Collections.emptyList(), testObject.getGlobalIncludePaths()));
164-
asserts.put("getHashFactory", () -> assertEquals(HashFactory.XX, testObject.getHashFactory()));
164+
asserts.put("getHashFactory", () -> assertEquals(HashFactory.METRO, testObject.getHashFactory()));
165165
asserts.put("getId", () -> assertEquals("cache", testObject.getId()));
166166
asserts.put("getLocalRepositoryLocation", () -> assertNull(testObject.getLocalRepositoryLocation()));
167167
asserts.put(

0 commit comments

Comments
 (0)