Unwind just the "pseudorandom probing" part of recent sets,tables cha…

…nges (#13816) * Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago #13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: #13393 #13418 #13440 #13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. * Fix `data.len` -> `dataLen` problem.
nim-lang · Mar 31, 2020 · b1aa3b1 · b1aa3b1
1 parent 7abeba6
commit b1aa3b1
Show file tree

Hide file tree

Showing 8 changed files with 99 additions and 175 deletions.
diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
@@ -18,87 +18,43 @@ when not defined(nimHasDefault):
     var v: T
     v
 
-const freeMarker = 0
-const deletedMarker = -1
-
-type UHash = uint
-
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isFilledAndValid(hcode: Hash): bool {.inline.} =
-  result = hcode != 0 and hcode != deletedMarker
-    # performance: we could use bit magic if needed
+proc isEmpty(hcode: Hash): bool {.inline.} =
+  result = hcode == 0
 
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
-
-proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
-  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
-
-proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
-  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
-  # and then switch to the formula below, to get "best of both worlds": good
-  # cache locality, except when a collision cluster is detected (ie, large number
-  # of iterations).
-  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
-  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
-  perturb = perturb shr PERTURB_SHIFT
+proc nextTry(h, maxHash: Hash): Hash {.inline.} =
+  result = (h + 1) and maxHash
 
 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
-  let length = t.dataLen
-  assert(length > counter2)
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+  assert(t.dataLen > t.counter)
+  result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4)
 
 proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
+  ## Return the value of ``initialSize`` to support ``count`` items.
   ##
   ## If more items are expected to be added, simply add that
   ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  #
   # Make sure to synchronize with `mustRehash`
   result = nextPowerOfTwo(count * 3 div 2 + 4)
 
-template getPerturb(t: typed, hc: Hash): UHash =
-  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
-  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
-  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
-  # this makes a major difference for cases like #13393; it causes the bits
-  # that were masked out in 1st position so they'll be masked in instead, and
-  # influence the recursion in nextTry earlier rather than later.
-  translateBits(cast[uint](hc), numBitsMask)
-
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
-  var perturb = t.getPerturb(hc)
-  var deletedIndex = -1
-  while true:
-    if isFilledAndValid(t.data[h].hcode):
-      # Compare hc THEN key with boolean short circuit. This makes the common case
-      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-      # just a few clock cycles, generally worth it for any non-integer-like A.
-      # performance: we optimize this: depending on type(key), skip hc comparison
-      if t.data[h].hcode == hc and t.data[h].key == key:
-        return h
-      h = nextTry(h, maxHash(t), perturb)
-    elif t.data[h].hcode == deletedMarker:
-      if deletedIndex == -1:
-        deletedIndex = h
-      h = nextTry(h, maxHash(t), perturb)
-    else:
-      break
-  if deletedIndex == -1:
-    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
-  else:
-    # we prefer returning a (in fact the 1st found) deleted index
-    result = -1 - deletedIndex
+  while isFilled(t.data[h].hcode):
+    # Compare hc THEN key with boolean short circuit. This makes the common case
+    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+    # just a few clock cycles, generally worth it for any non-integer-like A.
+    if t.data[h].hcode == hc and t.data[h].key == key:
+      return h
+    h = nextTry(h, maxHash(t))
+  result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
   rawGetKnownHCImpl()
@@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) =
   hc = hash(key)
   if hc == 0: # This almost never taken branch should be very predictable.
     hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
-  elif hc == deletedMarker:
-    hc = 214159261
 
 template genHash(key: typed): Hash =
   var res: Hash

diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
@@ -46,20 +46,16 @@ type
   IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
     elems: int           # only valid for small numbers
     counter, max: int
-    countDeleted: int
     head: PTrunk
     data: TrunkSeq
     a: array[0..33, int] # profiling shows that 34 elements are enough
 
 proc mustRehash[T](t: T): bool {.inline.} =
-  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
-  let counter2 = t.counter + t.countDeleted
   let length = t.max + 1
-  assert length > counter2
-  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
+  assert length > t.counter
+  result = (length * 2 < t.counter * 3) or (length - t.counter < 4)
 
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # FACTOR between hashcommon.nextTry, intsets.nextTry
   const PERTURB_SHIFT = 5
   var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
   perturb = cast[Hash](perturb2)

diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim
@@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A,
 
 proc enlarge[A](s: var HashSet[A]) =
   var n: KeyValuePairSeq[A]
-  newSeq(n, s.counter.rightSize)
+  newSeq(n, len(s.data) * growthFactor)
   swap(s.data, n) # n is now old seq
-  s.countDeleted = 0
   for i in countup(0, high(n)):
-    if isFilledAndValid(n[i].hcode):
+    if isFilled(n[i].hcode):
       var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode)
       rawInsert(s, s.data, n[i].key, n[i].hcode, j)
 
@@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} =
     rawInsert(s, s.data, key, hc, -1 - index)
     inc(s.counter)
 
+template doWhile(a, b) =
+  while true:
+    b
+    if not a: break
+
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   var hc: Hash
   var i = rawGet(s, key, hc)
@@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   if i >= 0:
     result = false
     dec(s.counter)
-    inc(s.countDeleted)
-    s.data[i].hcode = deletedMarker
-    s.data[i].key = default(type(s.data[i].key))
+    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+      var j = i # The correctness of this depends on (h+1) in nextTry,
+      var r = j # though may be adaptable to other simple sequences.
+      s.data[i].hcode = 0 # mark current EMPTY
+      s.data[i].key = default(type(s.data[i].key))
+      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+        i = (i + 1) and msk # increment mod table size
+        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
+          return
+        r = s.data[i].hcode and msk # "home" location of key@i
+      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
 
 template dollarImpl() {.dirty.} =
   result = "{"
@@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) =
   swap(s.data, n)
   while h >= 0:
     var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
       var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode)
       rawInsert(s, s.data, n[h].key, n[h].hcode, j)
     h = nxt
@@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} =
   result = true
   while h >= 0:
     var nxt = n[h].next
-    if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used
+    if isFilled(n[h].hcode):
       if n[h].hcode == hc and n[h].key == key:
         dec s.counter
         result = false

diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
@@ -68,7 +68,6 @@ type
     ## before calling other procs on it.
     data: KeyValuePairSeq[A]
     counter: int
-    countDeleted: int
 
 type
   OrderedKeyValuePair[A] = tuple[
@@ -81,7 +80,6 @@ type
     ## <#initOrderedSet,int>`_ before calling other procs on it.
     data: OrderedKeyValuePairSeq[A]
     counter, first, last: int
-    countDeleted: int
 
 const
   defaultInitialSize* = 64
@@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A =
   ##   echo b
   ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
+    if isFilled(s.data[h].hcode): yield s.data[h].key
 
 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
   ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A =
     doAssertRaises(KeyError, echo s.pop)
 
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
+    if isFilled(s.data[h].hcode):
       result = s.data[h].key
       excl(s, result)
       return result
@@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] =
 proc hash*[A](s: HashSet[A]): Hash =
   ## Hashing of HashSet.
   for h in 0 .. high(s.data):
-    if isFilledAndValid(s.data[h].hcode):
-      result = result xor s.data[h].hcode
+    result = result xor s.data[h].hcode
   result = !$result
 
 proc `$`*[A](s: HashSet[A]): string =
@@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string =
   ##   # --> {no, esc'aping, is " provided}
   dollarImpl()
 
+
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
      "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
 
@@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
     var idx = 0
     while h >= 0:
       var nxt = s.data[h].next
-      if isFilledAndValid(s.data[h].hcode):
+      if isFilled(s.data[h].hcode):
         yieldStmt
         inc(idx)
       h = nxt
@@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
   while h >= 0 and g >= 0:
     var nxh = s.data[h].next
     var nxg = t.data[g].next
-    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
+    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
       if s.data[h].key == t.data[g].key:
         inc compared
       else:

diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
@@ -25,7 +25,6 @@ type
   SharedTable*[A, B] = object ## generic hash SharedTable
     data: KeyValuePairSeq[A, B]
     counter, dataLen: int
-    countDeleted: int
     lock: Lock
 
 template maxHash(t): untyped = t.dataLen-1
@@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
   for i in 0..<oldSize:
     let eh = n[i].hcode
     if isFilled(eh):
-      var perturb = t.getPerturb(eh)
       var j: Hash = eh and maxHash(t)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t), perturb)
+        j = nextTry(j, maxHash(t))
       rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
   deallocShared(n)
 

diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
@@ -14,28 +14,21 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
-  var perturb = t.getPerturb(hc)
-  while true:
-    let hcode = t.data[h].hcode
-    if hcode == deletedMarker or hcode == freeMarker:
-      break
-    else:
-      h = nextTry(h, maxHash(t), perturb)
+  while isFilled(t.data[h].hcode):
+    h = nextTry(h, maxHash(t))
   result = h
 
-template rawInsertImpl(t) {.dirty.} =
+template rawInsertImpl() {.dirty.} =
   data[h].key = key
   data[h].val = val
-  if data[h].hcode == deletedMarker:
-    t.countDeleted.dec
   data[h].hcode = hc
 
 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
   rawGetDeepImpl()
 
 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl(t)
+  rawInsertImpl()
 
 template checkIfInitialized() =
   when compiles(defaultInitialSize):
@@ -51,6 +44,7 @@ template addImpl(enlarge) {.dirty.} =
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
+  checkIfInitialized()
   if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
@@ -88,11 +82,24 @@ template delImplIdx(t, i) =
   let msk = maxHash(t)
   if i >= 0:
     dec(t.counter)
-    inc(t.countDeleted)
-    t.data[i].hcode = deletedMarker
-    t.data[i].key = default(type(t.data[i].key))
-    t.data[i].val = default(type(t.data[i].val))
-    # mustRehash + enlarge not needed because counter+countDeleted doesn't change
+    block outer:
+      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+        var j = i         # The correctness of this depends on (h+1) in nextTry,
+        var r = j         # though may be adaptable to other simple sequences.
+        t.data[i].hcode = 0              # mark current EMPTY
+        t.data[i].key = default(type(t.data[i].key))
+        t.data[i].val = default(type(t.data[i].val))
+        while true:
+          i = (i + 1) and msk            # increment mod table size
+          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
+            break outer
+          r = t.data[i].hcode and msk    # "home" location of key@i
+          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+            break
+        when defined(js):
+          t.data[j] = t.data[i]
+        else:
+          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
 
 template delImpl() {.dirty.} =
   var hc: Hash
@@ -108,7 +115,6 @@ template clearImpl() {.dirty.} =
   t.counter = 0
 
 template ctAnd(a, b): bool =
-  # pending https://github.com/nim-lang/Nim/issues/13502
   when a:
     when b: true
     else: false
@@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) =
       result.last = -1
 
 template insertImpl() = # for CountTable
-  checkIfInitialized()
+  if t.dataLen == 0: initImpl(t, defaultInitialSize)
   if mustRehash(t): enlarge(t)
   ctRawInsert(t, t.data, key, val)
   inc(t.counter)