Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit bc4854b

Browse files
committed
Guard against overflow when growing BytesToBytesMap
1 parent f5feadf commit bc4854b

File tree

3 files changed

+55
-8
lines changed

3 files changed

+55
-8
lines changed

unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,8 @@
3838
* This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers,
3939
* which is guaranteed to exhaust the space.
4040
* <p>
41-
* The map can support up to 2^31 keys because we use 32 bit MurmurHash. If the key cardinality is
42-
* higher than this, you should probably be using sorting instead of hashing for better cache
43-
* locality.
41+
* The map can support up to 2^30 keys. If the key cardinality is higher than this, you should
42+
* probably be using sorting instead of hashing for better cache locality.
4443
* <p>
4544
* This class is not thread safe.
4645
*/
@@ -81,6 +80,12 @@ public final class BytesToBytesMap {
8180
*/
8281
private static final long PAGE_SIZE_BYTES = 1L << 26; // 64 megabytes
8382

83+
/**
84+
* The maximum number of keys that BytesToBytesMap supports.
85+
*/
86+
@VisibleForTesting
87+
static final int MAX_CAPACITY = (1 << 30);
88+
8489
// This choice of page table size and page size means that we can address up to 500 gigabytes
8590
// of memory.
8691

@@ -150,6 +155,13 @@ public BytesToBytesMap(
150155
this.loadFactor = loadFactor;
151156
this.loc = new Location();
152157
this.enablePerfMetrics = enablePerfMetrics;
158+
if (initialCapacity <= 0) {
159+
throw new IllegalArgumentException("Initial capacity must be greater than 0");
160+
}
161+
if (initialCapacity > MAX_CAPACITY) {
162+
throw new IllegalArgumentException(
163+
"Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY);
164+
}
153165
allocate(initialCapacity);
154166
}
155167

@@ -417,6 +429,9 @@ public void putNewKey(
417429
isDefined = true;
418430
assert (keyLengthBytes % 8 == 0);
419431
assert (valueLengthBytes % 8 == 0);
432+
if (size == MAX_CAPACITY) {
433+
throw new IllegalStateException("BytesToBytesMap has reached maximum capacity");
434+
}
420435
// Here, we'll copy the data into our data pages. Because we only store a relative offset from
421436
// the key address instead of storing the absolute address of the value, the key and value
422437
// must be stored in the same memory page.
@@ -468,7 +483,7 @@ public void putNewKey(
468483
longArray.set(pos * 2 + 1, keyHashcode);
469484
updateAddressesAndSizes(storedKeyAddress);
470485
isDefined = true;
471-
if (size > growthThreshold) {
486+
if (size > growthThreshold && size < MAX_CAPACITY) {
472487
growAndRehash();
473488
}
474489
}
@@ -481,7 +496,9 @@ public void putNewKey(
481496
* @param capacity the new map capacity
482497
*/
483498
private void allocate(int capacity) {
484-
capacity = Math.max((int) Math.min(Integer.MAX_VALUE, nextPowerOf2(capacity)), 64);
499+
assert (capacity >= 0);
500+
// The capacity needs to be divisible by 64 so that our bit set can be sized properly
501+
capacity = Math.max((int) Math.min(MAX_CAPACITY, nextPowerOf2(capacity)), 64);
485502
longArray = new LongArray(memoryManager.allocate(capacity * 8 * 2));
486503
bitset = new BitSet(MemoryBlock.fromLongArray(new long[capacity / 64]));
487504

@@ -556,7 +573,8 @@ int getNumDataPages() {
556573
/**
557574
* Grows the size of the hash table and re-hash everything.
558575
*/
559-
private void growAndRehash() {
576+
@VisibleForTesting
577+
void growAndRehash() {
560578
long resizeStartTime = -1;
561579
if (enablePerfMetrics) {
562580
resizeStartTime = System.nanoTime();
@@ -567,7 +585,7 @@ private void growAndRehash() {
567585
final int oldCapacity = (int) oldBitSet.capacity();
568586

569587
// Allocate the new data structures
570-
allocate(Math.min(Integer.MAX_VALUE, growthStrategy.nextCapacity(oldCapacity)));
588+
allocate(Math.min(growthStrategy.nextCapacity(oldCapacity), MAX_CAPACITY));
571589

572590
// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
573591
for (int pos = oldBitSet.nextSetBit(0); pos >= 0; pos = oldBitSet.nextSetBit(pos + 1)) {

unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ public interface HashMapGrowthStrategy {
3232
class Doubling implements HashMapGrowthStrategy {
3333
@Override
3434
public int nextCapacity(int currentCapacity) {
35-
return currentCapacity * 2;
35+
assert (currentCapacity > 0);
36+
// Guard against overflow
37+
return (currentCapacity * 2 > 0) ? (currentCapacity * 2) : Integer.MAX_VALUE;
3638
}
3739
}
3840

unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,4 +335,31 @@ public void randomizedStressTest() {
335335
map.free();
336336
}
337337
}
338+
339+
@Test
340+
public void initialCapacityBoundsChecking() {
341+
try {
342+
new BytesToBytesMap(memoryManager, 0);
343+
Assert.fail("Expected IllegalArgumentException to be thrown");
344+
} catch (IllegalArgumentException e) {
345+
// expected exception
346+
}
347+
348+
try {
349+
new BytesToBytesMap(memoryManager, BytesToBytesMap.MAX_CAPACITY + 1);
350+
Assert.fail("Expected IllegalArgumentException to be thrown");
351+
} catch (IllegalArgumentException e) {
352+
// expected exception
353+
}
354+
355+
// Can allocate _at_ the max capacity
356+
new BytesToBytesMap(memoryManager, BytesToBytesMap.MAX_CAPACITY);
357+
}
358+
359+
@Test
360+
public void resizingLargeMap() {
361+
// As long as a map's capacity is below the max, we should be able to resize up to the max
362+
BytesToBytesMap map = new BytesToBytesMap(memoryManager, BytesToBytesMap.MAX_CAPACITY - 64);
363+
map.growAndRehash();
364+
}
338365
}

0 commit comments

Comments
 (0)