Closed
Description
import scala.collection.immutable
object ChampPerformance {
var size: Int = 10000
var existingKeys: Array[Any] = _
var missingKeys: Array[Any] = _
existingKeys = (0 to size).map(i => new Tracer((i % 4) match {
case 0 => i.toString
case 1 => i.toChar
case 2 => i.toDouble
case 3 => i.toInt
})).toArray
missingKeys = (size to 2 * size).toArray
def main(args: Array[String]): Unit = {
check(immutable.ChampHashMap())
check(immutable.HashMap())
}
def check(map: immutable.Map[Any, Any]) = {
val values = existingKeys.map(x => (x, ""))
val map1 = map ++ values
equalsCounter = 0; hashCodeCounter = 0
var i = 0;
while (i < size) {
assert(map1.contains(existingKeys(i)))
assert(!map1.contains(missingKeys(i)))
i += 1
}
println(map1.getClass.getName)
println("equalsCounter = " + equalsCounter)
println("hashCodeCounter = " + hashCodeCounter)
}
var equalsCounter = 0
var hashCodeCounter = 0
class Tracer[A](val a: A) {
override def equals(obj: scala.Any): Boolean = {
obj match {
case t1: Tracer[_] =>
equalsCounter +=1
a == t1.a
case _ => false
}
}
override def hashCode(): Int = {
hashCodeCounter += 1
a.##
}
}
}
scala.collection.immutable.ChampHashMap
equalsCounter = 2342
hashCodeCounter = 20000
scala.collection.immutable.HashMap$HashTrieMap
equalsCounter = 16
hashCodeCounter = 20000
This test uses poorly distributed hash codes of small numbers. ChampHashMap
(like immutable.HashMap
before it) uses the folllowing hash code improvement:
def improve(hcode: Int): Int = {
var h: Int = hcode + ~(hcode << 9)
h = h ^ (h >>> 14)
h = h + (h << 4)
h ^ (h >>> 10)
}
def computeHash(key: Any): Int =
improve(key.##)
Contrast this with the version used in mutable.HashMap
:
/**
* [ ... ]
* The goal is to distribute across bins as well as possible even if a hash code has low entropy at some bits.
* <p/>
* OLD VERSION - quick, but bad for sequence 0-10000 - little entropy in higher bits - since 2003
* {{{
* var h: Int = hcode + ~(hcode << 9)
* h = h ^ (h >>> 14)
* h = h + (h << 4)
* h ^ (h >>> 10)
* }}}
* the rest of the computation is due to SI-5293
*/
protected final def improve(hcode: Int, seed: Int): Int = rotateRight(byteswap32(hcode), seed)
Those calls to equals
occur in map1.contains(missingKeys(i))
.
ChampHashMap
calls key.equals(existingKey)
even those the hash codes differ.
map1.contains(new Tracer(10283))
, which calls Tracer(10283).equals(Tracer(4634.0))
(Tracer(4634.0)
is an existing element in the map).
Notice the similarities in the hash codes:
scala> improve(4634.0.##).toBinaryString
res14: String = 11111101011001010110111001011000
scala> improve(10283.##).toBinaryString
res15: String = 11111010010100000110111001011000
scala> (improve(4634.0.##) ^ improve(10283.##)).toBinaryString
res16: String = 111001101010000000000000000
Leads to:
Comparison with immutable.HashMap
The old implementation first checked that the hash code of the located tree node is identical to the hash code of the key: