Skip to content

Commit

Permalink
add efficient std/private/lookuptables
Browse files Browse the repository at this point in the history
  • Loading branch information
timotheecour committed May 19, 2021
1 parent 53935b8 commit ada3678
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 53 deletions.
6 changes: 4 additions & 2 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,13 @@

- Added `std/enumutils` module. Added `genEnumCaseStmt` macro that generates case statement to parse string to enum.
Added `items` for enums with holes.
Added `symbolName` to return the enum symbol name ignoring the human readable name.
Added `symbolRank` to return the index in which an enum member is listed in an enum.
Added `symbolName` to return the enum symbol name ignoring the human readable name (`O(1)` cost).
Added `symbolRank` to return the index in which an enum member is listed in an enum (`O(1)` cost).

- Added `typetraits.HoleyEnum` for enums with holes, `OrdinalEnum` for enums without holes.

- Added a (private for now) `std/private/lookuptables` for writing efficient lookup tables.

- Removed deprecated `iup` module from stdlib, it has already moved to
[nimble](https://github.com/nim-lang/iup).

Expand Down
15 changes: 3 additions & 12 deletions lib/pure/math.nim
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ func isPowerOfTwo*(x: int): bool =

return (x > 0) and ((x and (x - 1)) == 0)

from std/private/lookuptables import nil

func nextPowerOfTwo*(x: int): int =
## Returns `x` rounded up to the nearest power of two.
##
Expand All @@ -315,18 +317,7 @@ func nextPowerOfTwo*(x: int): int =
doAssert nextPowerOfTwo(5) == 8
doAssert nextPowerOfTwo(0) == 1
doAssert nextPowerOfTwo(-16) == 1

result = x - 1
when defined(cpu64):
result = result or (result shr 32)
when sizeof(int) > 2:
result = result or (result shr 16)
when sizeof(int) > 1:
result = result or (result shr 8)
result = result or (result shr 4)
result = result or (result shr 2)
result = result or (result shr 1)
result += 1 + ord(x <= 0)
lookuptables.nextPowerOfTwo(x)

func sum*[T](x: openArray[T]): T =
## Computes the sum of the elements in `x`.
Expand Down
54 changes: 15 additions & 39 deletions lib/std/enumutils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -86,48 +86,26 @@ iterator items*[T: HoleyEnum](E: typedesc[T]): T =
assert B[float].toSeq == [B[float].b0, B[float].b1]
for a in enumFullRange(E): yield a

func span(T: typedesc[HoleyEnum]): int =
(T.high.ord - T.low.ord) + 1

const invalidSlot = uint8.high

proc genLookup[T: typedesc[HoleyEnum]](_: T): auto =
const n = span(T)
var ret: array[n, uint8]
var i = 0
assert n <= invalidSlot.int
for ai in mitems(ret): ai = invalidSlot
for ai in items(T):
ret[ai.ord - T.low.ord] = uint8(i)
inc(i)
return ret
import std/private/lookuptables

func symbolRankImpl[T](a: T): int {.inline.} =
const n = T.span
const thres = 255 # must be <= `invalidSlot`, but this should be tuned.
when n <= thres:
const lookup = genLookup(T)
let lookup2 {.global.} = lookup # xxx improve pending https://github.com/timotheecour/Nim/issues/553
#[
This could be optimized using a hash adapted to `T` (possible since it's known at CT)
to get better key distribution before indexing into the lookup table table.
]#
{.noSideEffect.}: # because it's immutable
let ret = lookup2[ord(a) - T.low.ord]
if ret != invalidSlot: return ret.int
else:
var i = 0
# we could also generate a case statement as optimization
for ai in items(T):
if ai == a: return i
inc(i)
const lut = (proc(): auto = # pending https://github.com/nim-lang/RFCs/issues/276
var a: seq[int]
for ai in items(T): a.add ai.ord
a.initLookupTable)()
let lut2 {.global.} = lut
# xxx improve pending https://github.com/timotheecour/Nim/issues/553
{.noSideEffect.}: # because it's immutable
let ret = lut2.lookup(a.ord)
if ret != -1: return ret
raise newException(IndexDefect, $ord(a) & " invalid for " & $T)

template symbolRank*[T: enum](a: T): int =
## Returns the index in which `a` is listed in `T`.
##
## The cost for a `HoleyEnum` is implementation defined, currently optimized
## for small enums, otherwise is `O(T.enumLen)`.
## Returns the index in which `a` is listed in `T` with `O(1)` cost even
## for `HoleyEnum`.
# The `O(1)` cost for `HoleyEnum` should hold except for adverserially designed
# holey enums; if that becomes a problem we could fix it by optimizing over
# `lookuptables.pseudoRandomMixing` since `T` is known at CT.
runnableExamples:
type
A = enum a0 = -3, a1 = 10, a2, a3 = (20, "f3Alt") # HoleyEnum
Expand All @@ -145,8 +123,6 @@ template symbolRank*[T: enum](a: T): int =

func symbolName*[T: enum](a: T): string =
## Returns the symbol name of an enum.
##
## This uses `symbolRank`.
runnableExamples:
type B = enum
b0 = (10, "kb0")
Expand Down
81 changes: 81 additions & 0 deletions lib/std/private/lookuptables.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#[
Experimental API, subject to change.
## benchmark
see tests/benchmarks/tlookuptables.nim
## design goals
* low level module with few/no dependencies, which can be used in other modules;
this precludes importing math, tables, hashes.
* high performance (faster than `std/tables`)
* avoid the complexity of tables.nim + friends (but could serve as building block for it)
]#

func nextPowerOfTwo*(x: int): int {.inline.} =
## documented (for now) in `nextPowerOfTwo.math`
result = x - 1
when defined(cpu64):
result = result or (result shr 32)
when sizeof(int) > 2:
result = result or (result shr 16)
when sizeof(int) > 1:
result = result or (result shr 8)
result = result or (result shr 4)
result = result or (result shr 2)
result = result or (result shr 1)
result += 1 + ord(x <= 0)

type
SimpleHash* = uint
# math works out better with `uint` than with
# int as done in `hash.Hash`
LookupTable*[T] = object
cells*: seq[SimpleHash]
keys*: seq[T]

const pseudoRandomMixing = 5
# this could be chosen to minimize the exppected number
# of calls to `nextCell`, if the key distribution is known.

template nextCell(h, m): untyped =
## pseudo-random probing
(h * pseudoRandomMixing) and m

template simpleHash[T](a: T): SimpleHash =
cast[SimpleHash](a)

proc initLookupTable*[T](a: openArray[T]): LookupTable[T] =
## Returns a lookup table that supports efficient lookup.
let size = max(2, nextPowerOfTwo(a.len * 3 div 2))
result.cells.setLen size
result.keys.setLen size
let m = SimpleHash(size - 1)
var i = 1'u
for ai in a:
var index = ai.simpleHash and m
while true:
let h = result.cells[index]
if h == 0: break
index = nextCell(h, m)
result.cells[index] = i
result.keys[index] = ai
inc(i)

proc lookup*[T](tab: LookupTable[T], key: T): int =
## return `-1` if `key` not found, else an index `i`
## at which we can find `key`.
runnableExamples:
let a = @[100.0, 0.0, 13.3, -3.12]
let b = a.initLookupTable
assert b.lookup(13.3) == 2 # found at index 2
assert b.lookup(0.3) == -1 # not found
let size = tab.cells.len
let m = SimpleHash(size - 1)
var index = key.simpleHash and m
while true:
let h = tab.cells[index]
if h == 0: return -1
elif tab.keys[index] == key:
return cast[int](h - 1)
else:
index = nextCell(h, m)
78 changes: 78 additions & 0 deletions tests/benchmarks/tlookuptables.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
discard """
joinable: false
"""

#[
nim r -d:numIter:10000000 -d:danger tests/benchmarks/tlookuptables.nim
on OSX:
lookup genData1 1.111774 2100000000
lookupTables genData1 2.469191 2100000000
lookupNaive genData1 1.580014 2100000000
lookup genData2 0.5989679999999993 12250000000
lookupTables genData2 1.704296 12250000000
lookupNaive genData2 4.306558000000001 12250000000
]#

import std/[times, tables, strutils]
import std/private/lookuptables
import std/private/asciitables

const numIter {.intDefine.} = 100

proc lookupTables[T](a: Table[T, int], key: T): int =
a[key]

proc lookupNaive[T](a: seq[T], key: T): int =
for i, ai in a:
if ai == key: return i
return -1

proc genData1(): seq[int] =
# checks performance on small data
result = @[100, 13, 15, 12, 0, -3, 44]

proc genData2(): seq[int] =
# size 50
for i in 0..<50:
result.add i * 37

var msg = ""

template mainAux(genData, algo) =
const genDataName = astToStr(genData)
when genDataName == "genData1": (let factor = 10)
elif genDataName == "genData2": (let factor = 1)
else: static: doAssert false, genDataName

let a = genData()
const name = astToStr(algo)
when name == "lookup":
let tab = initLookupTable(a)
elif name == "lookupNaive":
template tab: untyped = a
elif name == "lookupTables":
var tab: Table[int, int]
for i, ai in a:
tab[ai] = i
else: static: doAssert false, name
let t = cpuTime()
var c = 0
let n = numIter * factor
for i in 0..<n:
for ai in a:
c += algo(tab, ai)
let t2 = cpuTime()-t
let msgi = "$#\t$#\t$#\t$#" % [name, genDataName, $t2, $c]
echo msgi # show intermediate progress
msg.add msgi & "\n"

template main2(genData) =
mainAux(genData, lookup)
mainAux(genData, lookupTables)
mainAux(genData, lookupNaive)

proc main() =
main2(genData1)
main2(genData2)
echo "---\n" & msg.alignTable
main()

0 comments on commit ada3678

Please sign in to comment.