robinhood hashing

timotheecour · Feb 20, 2020 · 96a2d49 · 96a2d49 · c-blake · Feb 24, 2020
1 parent 3385027
commit 96a2d49
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 4 deletions.
diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
@@ -20,6 +20,8 @@ when not defined(nimHasDefault):
 
 const freeMarker = 0
 const deletedMarker = -1
+type CMode = enum krobin, kmix
+const cmode = krobin
 
 type UHash = uint
 
@@ -73,6 +75,37 @@ template getPerturb(t: typed, hc: Hash): UHash =
   # influence the recursion in nextTry earlier rather than later.
   translateBits(cast[uint](hc), numBitsMask)
 
+template robinHoodGet(t: typed, hc, mustNextTry, key0): int =
+  let m = maxHash(t)
+  var index: Hash = hc and m
+  while mustNextTry(t.data[index], index):
+    # TODO: can short circuit/optimize using depth
+    index = (index + 1) and m
+  index
+
+template robinHoodInsert(t: typed, hc, mustNextTry, key0, val0): int =
+  static: doAssert cmode == krobin
+  let m = maxHash(t)
+  var index: Hash = hc and m
+  var depth = 0
+  while mustNextTry(t.data[index], index):
+    depth.inc
+    let indexi = t.data[index].hcode and m
+    var depthi = index - indexi
+    if depthi < 0: depthi = depthi + m + 1
+    # debugecho (depth, depthi, depth > depthi)
+    # if depth > depthi and false:
+    if depth > depthi:
+      # steal!
+      when compiles(t.data[index].val): # eg: not for HashSet
+        swap(t.data[index].val, val0)
+      swap(t.data[index].key, key0)
+      swap(t.data[index].hcode, hc)
+      depth = depthi
+    index = (index + 1) and m
+  # debugecho (depth, index, key0, val0, hc)
+  index
+
 template findCell(t: typed, hc, mustNextTry): int =
   let m = maxHash(t)
   var index: Hash = hc and m
@@ -103,6 +136,7 @@ template findCell(t: typed, hc, mustNextTry): int =
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0: return -1
   var deletedIndex = -1
+
   template mustNextTry(cell, index): bool =
     if isFilledAndValid(cell.hcode):
       # Compare hc THEN key with boolean short circuit. This makes the common case
@@ -121,7 +155,15 @@ template rawGetKnownHCImpl() {.dirty.} =
       true
     else: false
 
-  let index = findCell(t, hc, mustNextTry)
+  when compiles(t.data[0].val) and cmode == krobin:
+    # IMPROVE
+    when compiles(val):
+      let index = robinHoodInsert(t, hc, mustNextTry, key, val)
+    else:
+      let index = robinHoodGet(t, hc, mustNextTry, key)
+  else:
+    let index = findCell(t, hc, mustNextTry)
+
   if deletedIndex == -2:
     result = index
   elif deletedIndex == -1:
@@ -151,3 +193,6 @@ template rawGetImpl() {.dirty.} =
 
 proc rawGet[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
   rawGetImpl()
+
+proc rawPutAux[X, A, B](t: var X, key: var A, hc: var Hash, val: var B): int {.inline.} =
+  rawGetKnownHCImpl()
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
@@ -61,7 +61,10 @@ template maybeRehashPutImpl(enlarge) {.dirty.} =
 template putImpl(enlarge) {.dirty.} =
   checkIfInitialized()
   var hc: Hash
-  var index = rawGet(t, key, hc)
+  genHashImpl(key, hc)
+  var val = val
+  var key = key
+  var index = rawPutAux(t, key, hc, val)
   if index >= 0: t.data[index].val = val
   else: maybeRehashPutImpl(enlarge)
 

diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
@@ -270,10 +270,13 @@ proc enlarge[A, B](t: var Table[A, B]) =
   swap(t.data, n)
   t.countDeleted = 0
   for i in countup(0, high(n)):
-    let eh = n[i].hcode
+    var eh = n[i].hcode
     if isFilledAndValid(eh):
       template mustNextTry(cell, index): bool = isFilled(cell.hcode)
-      let j = findCell(t, eh, mustNextTry)
+      when cmode == krobin:
+        let j = robinHoodInsert(t, eh, mustNextTry, n[i].key, n[i].val)
+      else:
+        let j = findCell(t, eh, mustNextTry)
       when defined(js):
         rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
       else: