38
38
* This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers,
39
39
* which is guaranteed to exhaust the space.
40
40
* <p>
41
- * The map can support up to 2^31 keys because we use 32 bit MurmurHash. If the key cardinality is
42
- * higher than this, you should probably be using sorting instead of hashing for better cache
43
- * locality.
41
+ * The map can support up to 2^30 keys. If the key cardinality is higher than this, you should
42
+ * probably be using sorting instead of hashing for better cache locality.
44
43
* <p>
45
44
* This class is not thread safe.
46
45
*/
@@ -81,6 +80,12 @@ public final class BytesToBytesMap {
81
80
*/
82
81
private static final long PAGE_SIZE_BYTES = 1L << 26 ; // 64 megabytes
83
82
83
+ /**
84
+ * The maximum number of keys that BytesToBytesMap supports.
85
+ */
86
+ @ VisibleForTesting
87
+ static final int MAX_CAPACITY = (1 << 30 );
88
+
84
89
// This choice of page table size and page size means that we can address up to 500 gigabytes
85
90
// of memory.
86
91
@@ -150,6 +155,13 @@ public BytesToBytesMap(
150
155
this .loadFactor = loadFactor ;
151
156
this .loc = new Location ();
152
157
this .enablePerfMetrics = enablePerfMetrics ;
158
+ if (initialCapacity <= 0 ) {
159
+ throw new IllegalArgumentException ("Initial capacity must be greater than 0" );
160
+ }
161
+ if (initialCapacity > MAX_CAPACITY ) {
162
+ throw new IllegalArgumentException (
163
+ "Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY );
164
+ }
153
165
allocate (initialCapacity );
154
166
}
155
167
@@ -417,6 +429,9 @@ public void putNewKey(
417
429
isDefined = true ;
418
430
assert (keyLengthBytes % 8 == 0 );
419
431
assert (valueLengthBytes % 8 == 0 );
432
+ if (size == MAX_CAPACITY ) {
433
+ throw new IllegalStateException ("BytesToBytesMap has reached maximum capacity" );
434
+ }
420
435
// Here, we'll copy the data into our data pages. Because we only store a relative offset from
421
436
// the key address instead of storing the absolute address of the value, the key and value
422
437
// must be stored in the same memory page.
@@ -468,7 +483,7 @@ public void putNewKey(
468
483
longArray .set (pos * 2 + 1 , keyHashcode );
469
484
updateAddressesAndSizes (storedKeyAddress );
470
485
isDefined = true ;
471
- if (size > growthThreshold ) {
486
+ if (size > growthThreshold && size < MAX_CAPACITY ) {
472
487
growAndRehash ();
473
488
}
474
489
}
@@ -481,7 +496,9 @@ public void putNewKey(
481
496
* @param capacity the new map capacity
482
497
*/
483
498
private void allocate (int capacity ) {
484
- capacity = Math .max ((int ) Math .min (Integer .MAX_VALUE , nextPowerOf2 (capacity )), 64 );
499
+ assert (capacity >= 0 );
500
+ // The capacity needs to be divisible by 64 so that our bit set can be sized properly
501
+ capacity = Math .max ((int ) Math .min (MAX_CAPACITY , nextPowerOf2 (capacity )), 64 );
485
502
longArray = new LongArray (memoryManager .allocate (capacity * 8 * 2 ));
486
503
bitset = new BitSet (MemoryBlock .fromLongArray (new long [capacity / 64 ]));
487
504
@@ -556,7 +573,8 @@ int getNumDataPages() {
556
573
/**
557
574
* Grows the size of the hash table and re-hash everything.
558
575
*/
559
- private void growAndRehash () {
576
+ @ VisibleForTesting
577
+ void growAndRehash () {
560
578
long resizeStartTime = -1 ;
561
579
if (enablePerfMetrics ) {
562
580
resizeStartTime = System .nanoTime ();
@@ -567,7 +585,7 @@ private void growAndRehash() {
567
585
final int oldCapacity = (int ) oldBitSet .capacity ();
568
586
569
587
// Allocate the new data structures
570
- allocate (Math .min (Integer . MAX_VALUE , growthStrategy .nextCapacity (oldCapacity )));
588
+ allocate (Math .min (growthStrategy .nextCapacity (oldCapacity ), MAX_CAPACITY ));
571
589
572
590
// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
573
591
for (int pos = oldBitSet .nextSetBit (0 ); pos >= 0 ; pos = oldBitSet .nextSetBit (pos + 1 )) {
0 commit comments