nim-lang · timotheecour · May 19, 2021
diff --git a/changelog.md b/changelog.md
@@ -139,11 +139,13 @@
 
 - Added `std/enumutils` module. Added `genEnumCaseStmt` macro that generates case statement to parse string to enum.
   Added `items` for enums with holes.
-  Added `symbolName` to return the enum symbol name ignoring the human readable name.
-  Added `symbolRank` to return the index in which an enum member is listed in an enum.
+  Added `symbolName` to return the enum symbol name ignoring the human readable name (`O(1)` cost).
+  Added `symbolRank` to return the index in which an enum member is listed in an enum (`O(1)` cost).
 
 - Added `typetraits.HoleyEnum` for enums with holes, `OrdinalEnum` for enums without holes.
 
+- Added a (private for now) `std/private/lookuptables` for writing efficient lookup tables.
+
 - Removed deprecated `iup` module from stdlib, it has already moved to
   [nimble](https://github.com/nim-lang/iup).
 

diff --git a/lib/pure/math.nim b/lib/pure/math.nim
@@ -303,6 +303,8 @@ func isPowerOfTwo*(x: int): bool =
 
   return (x > 0) and ((x and (x - 1)) == 0)
 
+from std/private/lookuptables import nil
+
 func nextPowerOfTwo*(x: int): int =
   ## Returns `x` rounded up to the nearest power of two.
   ##
@@ -315,18 +317,7 @@ func nextPowerOfTwo*(x: int): int =
     doAssert nextPowerOfTwo(5) == 8
     doAssert nextPowerOfTwo(0) == 1
     doAssert nextPowerOfTwo(-16) == 1
-
-  result = x - 1
-  when defined(cpu64):
-    result = result or (result shr 32)
-  when sizeof(int) > 2:
-    result = result or (result shr 16)
-  when sizeof(int) > 1:
-    result = result or (result shr 8)
-  result = result or (result shr 4)
-  result = result or (result shr 2)
-  result = result or (result shr 1)
-  result += 1 + ord(x <= 0)
+  lookuptables.nextPowerOfTwo(x)
 
 func sum*[T](x: openArray[T]): T =
   ## Computes the sum of the elements in `x`.

diff --git a/lib/std/enumutils.nim b/lib/std/enumutils.nim
@@ -86,48 +86,26 @@ iterator items*[T: HoleyEnum](E: typedesc[T]): T =
     assert B[float].toSeq == [B[float].b0, B[float].b1]
   for a in enumFullRange(E): yield a
 
-func span(T: typedesc[HoleyEnum]): int =
-  (T.high.ord - T.low.ord) + 1
-
-const invalidSlot = uint8.high
-
-proc genLookup[T: typedesc[HoleyEnum]](_: T): auto =
-  const n = span(T)
-  var ret: array[n, uint8]
-  var i = 0
-  assert n <= invalidSlot.int
-  for ai in mitems(ret): ai = invalidSlot
-  for ai in items(T):
-    ret[ai.ord - T.low.ord] = uint8(i)
-    inc(i)
-  return ret
+import std/private/lookuptables
 
 func symbolRankImpl[T](a: T): int {.inline.} =
-  const n = T.span
-  const thres = 255 # must be <= `invalidSlot`, but this should be tuned.
-  when n <= thres:
-    const lookup = genLookup(T)
-    let lookup2 {.global.} = lookup # xxx improve pending https://github.com/timotheecour/Nim/issues/553
-    #[
-    This could be optimized using a hash adapted to `T` (possible since it's known at CT)
-    to get better key distribution before indexing into the lookup table table.
-    ]#
-    {.noSideEffect.}: # because it's immutable
-      let ret = lookup2[ord(a) - T.low.ord]
-    if ret != invalidSlot: return ret.int
-  else:
-    var i = 0
-    # we could also generate a case statement as optimization
-    for ai in items(T):
-      if ai == a: return i
-      inc(i)
+  const lut = (proc(): auto = # pending https://github.com/nim-lang/RFCs/issues/276
+    var a: seq[int]
+    for ai in items(T): a.add ai.ord
+    a.initLookupTable)()
+  let lut2 {.global.} = lut
+    # xxx improve pending https://github.com/timotheecour/Nim/issues/553
+  {.noSideEffect.}: # because it's immutable
+    let ret = lut2.lookup(a.ord)
+  if ret != -1: return ret
   raise newException(IndexDefect, $ord(a) & " invalid for " & $T)
 
 template symbolRank*[T: enum](a: T): int =
-  ## Returns the index in which `a` is listed in `T`.
-  ##
-  ## The cost for a `HoleyEnum` is implementation defined, currently optimized
-  ## for small enums, otherwise is `O(T.enumLen)`.
+  ## Returns the index in which `a` is listed in `T` with `O(1)` cost even
+  ## for `HoleyEnum`.
+  # The `O(1)` cost for `HoleyEnum` should hold except for adverserially designed
+  # holey enums; if that becomes a problem we could fix it by optimizing over
+  # `lookuptables.pseudoRandomMixing` since `T` is known at CT.
   runnableExamples:
     type
       A = enum a0 = -3, a1 = 10, a2, a3 = (20, "f3Alt") # HoleyEnum
@@ -145,8 +123,6 @@ template symbolRank*[T: enum](a: T): int =
 
 func symbolName*[T: enum](a: T): string =
   ## Returns the symbol name of an enum.
-  ##
-  ## This uses `symbolRank`.
   runnableExamples:
     type B = enum
       b0 = (10, "kb0")

diff --git a/lib/std/private/lookuptables.nim b/lib/std/private/lookuptables.nim
@@ -0,0 +1,81 @@
+#[
+Experimental API, subject to change.
+
+## benchmark
+see tests/benchmarks/tlookuptables.nim
+
+## design goals
+* low level module with few/no dependencies, which can be used in other modules;
+  this precludes importing math, tables, hashes.
+* high performance (faster than `std/tables`)
+* avoid the complexity of tables.nim + friends (but could serve as building block for it)
+]#
+
+func nextPowerOfTwo*(x: int): int {.inline.} =
+  ## documented (for now) in `nextPowerOfTwo.math`
+  result = x - 1
+  when defined(cpu64):
+    result = result or (result shr 32)
+  when sizeof(int) > 2:
+    result = result or (result shr 16)
+  when sizeof(int) > 1:
+    result = result or (result shr 8)
+  result = result or (result shr 4)
+  result = result or (result shr 2)
+  result = result or (result shr 1)
+  result += 1 + ord(x <= 0)
+
+type
+  SimpleHash* = uint
+    # math works out better with `uint` than with
+    # int as done in `hash.Hash`
+  LookupTable*[T] = object
+    cells*: seq[SimpleHash]
+    keys*: seq[T]
+
+const pseudoRandomMixing = 5
+  # this could be chosen to minimize the exppected number
+  # of calls to `nextCell`, if the key distribution is known.
+
+template nextCell(h, m): untyped =
+  ## pseudo-random probing
+  (h * pseudoRandomMixing) and m
+
+template simpleHash[T](a: T): SimpleHash =
+  cast[SimpleHash](a)
+
+proc initLookupTable*[T](a: openArray[T]): LookupTable[T] =
+  ## Returns a lookup table that supports efficient lookup.
+  let size = max(2, nextPowerOfTwo(a.len * 3 div 2))
+  result.cells.setLen size
+  result.keys.setLen size
+  let m = SimpleHash(size - 1)
+  var i = 1'u
+  for ai in a:
+    var index = ai.simpleHash and m
+    while true:
+      let h = result.cells[index]
+      if h == 0: break
+      index = nextCell(h, m)
+    result.cells[index] = i
+    result.keys[index] = ai
+    inc(i)
+
+proc lookup*[T](tab: LookupTable[T], key: T): int =
+  ## return `-1` if `key` not found, else an index `i`
+  ## at which we can find `key`.
+  runnableExamples:
+    let a = @[100.0, 0.0, 13.3, -3.12]
+    let b = a.initLookupTable
+    assert b.lookup(13.3) == 2 # found at index 2
+    assert b.lookup(0.3) == -1 # not found
+  let size = tab.cells.len
+  let m = SimpleHash(size - 1)
+  var index = key.simpleHash and m
+  while true:
+    let h = tab.cells[index]
+    if h == 0: return -1
+    elif tab.keys[index] == key:
+      return cast[int](h - 1)
+    else:
+      index = nextCell(h, m)
diff --git a/tests/benchmarks/tlookuptables.nim b/tests/benchmarks/tlookuptables.nim
@@ -0,0 +1,78 @@
+discard """
+  joinable: false
+"""
+
+#[
+nim r -d:numIter:10000000 -d:danger tests/benchmarks/tlookuptables.nim
+on OSX:
+lookup       genData1 1.111774           2100000000
+lookupTables genData1 2.469191           2100000000
+lookupNaive  genData1 1.580014           2100000000
+lookup       genData2 0.5989679999999993 12250000000
+lookupTables genData2 1.704296           12250000000
+lookupNaive  genData2 4.306558000000001  12250000000
+]#
+
+import std/[times, tables, strutils]
+import std/private/lookuptables
+import std/private/asciitables
+
+const numIter {.intDefine.} = 100
+
+proc lookupTables[T](a: Table[T, int], key: T): int =
+  a[key]
+
+proc lookupNaive[T](a: seq[T], key: T): int =
+  for i, ai in a:
+    if ai == key: return i
+  return -1
+
+proc genData1(): seq[int] =
+  # checks performance on small data
+  result = @[100, 13, 15, 12, 0, -3, 44]
+
+proc genData2(): seq[int] =
+  # size 50
+  for i in 0..<50:
+    result.add i * 37
+
+var msg = ""
+
+template mainAux(genData, algo) =
+  const genDataName = astToStr(genData)
+  when genDataName == "genData1": (let factor = 10)
+  elif genDataName == "genData2": (let factor = 1)
+  else: static: doAssert false, genDataName
+
+  let a = genData()
+  const name = astToStr(algo)
+  when name == "lookup":
+    let tab = initLookupTable(a)
+  elif name == "lookupNaive":
+    template tab: untyped = a
+  elif name == "lookupTables":
+    var tab: Table[int, int]
+    for i, ai in a:
+      tab[ai] = i
+  else: static: doAssert false, name
+  let t = cpuTime()
+  var c = 0
+  let n = numIter * factor
+  for i in 0..<n:
+    for ai in a:
+      c += algo(tab, ai)
+  let t2 = cpuTime()-t
+  let msgi = "$#\t$#\t$#\t$#" % [name, genDataName, $t2, $c]
+  echo msgi # show intermediate progress
+  msg.add msgi & "\n"
+
+template main2(genData) =
+  mainAux(genData, lookup)
+  mainAux(genData, lookupTables)
+  mainAux(genData, lookupNaive)
+
+proc main() =
+  main2(genData1)
+  main2(genData2)
+  echo "---\n" & msg.alignTable
+main()