Skip to content

Commit ada3678

Browse files
committed
add efficient std/private/lookuptables
1 parent 53935b8 commit ada3678

File tree

5 files changed

+181
-53
lines changed

5 files changed

+181
-53
lines changed

changelog.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,13 @@
139139

140140
- Added `std/enumutils` module. Added `genEnumCaseStmt` macro that generates case statement to parse string to enum.
141141
Added `items` for enums with holes.
142-
Added `symbolName` to return the enum symbol name ignoring the human readable name.
143-
Added `symbolRank` to return the index in which an enum member is listed in an enum.
142+
Added `symbolName` to return the enum symbol name ignoring the human readable name (`O(1)` cost).
143+
Added `symbolRank` to return the index in which an enum member is listed in an enum (`O(1)` cost).
144144

145145
- Added `typetraits.HoleyEnum` for enums with holes, `OrdinalEnum` for enums without holes.
146146

147+
- Added a (private for now) `std/private/lookuptables` for writing efficient lookup tables.
148+
147149
- Removed deprecated `iup` module from stdlib, it has already moved to
148150
[nimble](https://github.com/nim-lang/iup).
149151

lib/pure/math.nim

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ func isPowerOfTwo*(x: int): bool =
303303

304304
return (x > 0) and ((x and (x - 1)) == 0)
305305

306+
from std/private/lookuptables import nil
307+
306308
func nextPowerOfTwo*(x: int): int =
307309
## Returns `x` rounded up to the nearest power of two.
308310
##
@@ -315,18 +317,7 @@ func nextPowerOfTwo*(x: int): int =
315317
doAssert nextPowerOfTwo(5) == 8
316318
doAssert nextPowerOfTwo(0) == 1
317319
doAssert nextPowerOfTwo(-16) == 1
318-
319-
result = x - 1
320-
when defined(cpu64):
321-
result = result or (result shr 32)
322-
when sizeof(int) > 2:
323-
result = result or (result shr 16)
324-
when sizeof(int) > 1:
325-
result = result or (result shr 8)
326-
result = result or (result shr 4)
327-
result = result or (result shr 2)
328-
result = result or (result shr 1)
329-
result += 1 + ord(x <= 0)
320+
lookuptables.nextPowerOfTwo(x)
330321

331322
func sum*[T](x: openArray[T]): T =
332323
## Computes the sum of the elements in `x`.

lib/std/enumutils.nim

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -86,48 +86,26 @@ iterator items*[T: HoleyEnum](E: typedesc[T]): T =
8686
assert B[float].toSeq == [B[float].b0, B[float].b1]
8787
for a in enumFullRange(E): yield a
8888

89-
func span(T: typedesc[HoleyEnum]): int =
90-
(T.high.ord - T.low.ord) + 1
91-
92-
const invalidSlot = uint8.high
93-
94-
proc genLookup[T: typedesc[HoleyEnum]](_: T): auto =
95-
const n = span(T)
96-
var ret: array[n, uint8]
97-
var i = 0
98-
assert n <= invalidSlot.int
99-
for ai in mitems(ret): ai = invalidSlot
100-
for ai in items(T):
101-
ret[ai.ord - T.low.ord] = uint8(i)
102-
inc(i)
103-
return ret
89+
import std/private/lookuptables
10490

10591
func symbolRankImpl[T](a: T): int {.inline.} =
106-
const n = T.span
107-
const thres = 255 # must be <= `invalidSlot`, but this should be tuned.
108-
when n <= thres:
109-
const lookup = genLookup(T)
110-
let lookup2 {.global.} = lookup # xxx improve pending https://github.com/timotheecour/Nim/issues/553
111-
#[
112-
This could be optimized using a hash adapted to `T` (possible since it's known at CT)
113-
to get better key distribution before indexing into the lookup table table.
114-
]#
115-
{.noSideEffect.}: # because it's immutable
116-
let ret = lookup2[ord(a) - T.low.ord]
117-
if ret != invalidSlot: return ret.int
118-
else:
119-
var i = 0
120-
# we could also generate a case statement as optimization
121-
for ai in items(T):
122-
if ai == a: return i
123-
inc(i)
92+
const lut = (proc(): auto = # pending https://github.com/nim-lang/RFCs/issues/276
93+
var a: seq[int]
94+
for ai in items(T): a.add ai.ord
95+
a.initLookupTable)()
96+
let lut2 {.global.} = lut
97+
# xxx improve pending https://github.com/timotheecour/Nim/issues/553
98+
{.noSideEffect.}: # because it's immutable
99+
let ret = lut2.lookup(a.ord)
100+
if ret != -1: return ret
124101
raise newException(IndexDefect, $ord(a) & " invalid for " & $T)
125102

126103
template symbolRank*[T: enum](a: T): int =
127-
## Returns the index in which `a` is listed in `T`.
128-
##
129-
## The cost for a `HoleyEnum` is implementation defined, currently optimized
130-
## for small enums, otherwise is `O(T.enumLen)`.
104+
## Returns the index in which `a` is listed in `T` with `O(1)` cost even
105+
## for `HoleyEnum`.
106+
# The `O(1)` cost for `HoleyEnum` should hold except for adverserially designed
107+
# holey enums; if that becomes a problem we could fix it by optimizing over
108+
# `lookuptables.pseudoRandomMixing` since `T` is known at CT.
131109
runnableExamples:
132110
type
133111
A = enum a0 = -3, a1 = 10, a2, a3 = (20, "f3Alt") # HoleyEnum
@@ -145,8 +123,6 @@ template symbolRank*[T: enum](a: T): int =
145123

146124
func symbolName*[T: enum](a: T): string =
147125
## Returns the symbol name of an enum.
148-
##
149-
## This uses `symbolRank`.
150126
runnableExamples:
151127
type B = enum
152128
b0 = (10, "kb0")

lib/std/private/lookuptables.nim

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#[
2+
Experimental API, subject to change.
3+
4+
## benchmark
5+
see tests/benchmarks/tlookuptables.nim
6+
7+
## design goals
8+
* low level module with few/no dependencies, which can be used in other modules;
9+
this precludes importing math, tables, hashes.
10+
* high performance (faster than `std/tables`)
11+
* avoid the complexity of tables.nim + friends (but could serve as building block for it)
12+
]#
13+
14+
func nextPowerOfTwo*(x: int): int {.inline.} =
15+
## documented (for now) in `nextPowerOfTwo.math`
16+
result = x - 1
17+
when defined(cpu64):
18+
result = result or (result shr 32)
19+
when sizeof(int) > 2:
20+
result = result or (result shr 16)
21+
when sizeof(int) > 1:
22+
result = result or (result shr 8)
23+
result = result or (result shr 4)
24+
result = result or (result shr 2)
25+
result = result or (result shr 1)
26+
result += 1 + ord(x <= 0)
27+
28+
type
29+
SimpleHash* = uint
30+
# math works out better with `uint` than with
31+
# int as done in `hash.Hash`
32+
LookupTable*[T] = object
33+
cells*: seq[SimpleHash]
34+
keys*: seq[T]
35+
36+
const pseudoRandomMixing = 5
37+
# this could be chosen to minimize the exppected number
38+
# of calls to `nextCell`, if the key distribution is known.
39+
40+
template nextCell(h, m): untyped =
41+
## pseudo-random probing
42+
(h * pseudoRandomMixing) and m
43+
44+
template simpleHash[T](a: T): SimpleHash =
45+
cast[SimpleHash](a)
46+
47+
proc initLookupTable*[T](a: openArray[T]): LookupTable[T] =
48+
## Returns a lookup table that supports efficient lookup.
49+
let size = max(2, nextPowerOfTwo(a.len * 3 div 2))
50+
result.cells.setLen size
51+
result.keys.setLen size
52+
let m = SimpleHash(size - 1)
53+
var i = 1'u
54+
for ai in a:
55+
var index = ai.simpleHash and m
56+
while true:
57+
let h = result.cells[index]
58+
if h == 0: break
59+
index = nextCell(h, m)
60+
result.cells[index] = i
61+
result.keys[index] = ai
62+
inc(i)
63+
64+
proc lookup*[T](tab: LookupTable[T], key: T): int =
65+
## return `-1` if `key` not found, else an index `i`
66+
## at which we can find `key`.
67+
runnableExamples:
68+
let a = @[100.0, 0.0, 13.3, -3.12]
69+
let b = a.initLookupTable
70+
assert b.lookup(13.3) == 2 # found at index 2
71+
assert b.lookup(0.3) == -1 # not found
72+
let size = tab.cells.len
73+
let m = SimpleHash(size - 1)
74+
var index = key.simpleHash and m
75+
while true:
76+
let h = tab.cells[index]
77+
if h == 0: return -1
78+
elif tab.keys[index] == key:
79+
return cast[int](h - 1)
80+
else:
81+
index = nextCell(h, m)

tests/benchmarks/tlookuptables.nim

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
discard """
2+
joinable: false
3+
"""
4+
5+
#[
6+
nim r -d:numIter:10000000 -d:danger tests/benchmarks/tlookuptables.nim
7+
on OSX:
8+
lookup genData1 1.111774 2100000000
9+
lookupTables genData1 2.469191 2100000000
10+
lookupNaive genData1 1.580014 2100000000
11+
lookup genData2 0.5989679999999993 12250000000
12+
lookupTables genData2 1.704296 12250000000
13+
lookupNaive genData2 4.306558000000001 12250000000
14+
]#
15+
16+
import std/[times, tables, strutils]
17+
import std/private/lookuptables
18+
import std/private/asciitables
19+
20+
const numIter {.intDefine.} = 100
21+
22+
proc lookupTables[T](a: Table[T, int], key: T): int =
23+
a[key]
24+
25+
proc lookupNaive[T](a: seq[T], key: T): int =
26+
for i, ai in a:
27+
if ai == key: return i
28+
return -1
29+
30+
proc genData1(): seq[int] =
31+
# checks performance on small data
32+
result = @[100, 13, 15, 12, 0, -3, 44]
33+
34+
proc genData2(): seq[int] =
35+
# size 50
36+
for i in 0..<50:
37+
result.add i * 37
38+
39+
var msg = ""
40+
41+
template mainAux(genData, algo) =
42+
const genDataName = astToStr(genData)
43+
when genDataName == "genData1": (let factor = 10)
44+
elif genDataName == "genData2": (let factor = 1)
45+
else: static: doAssert false, genDataName
46+
47+
let a = genData()
48+
const name = astToStr(algo)
49+
when name == "lookup":
50+
let tab = initLookupTable(a)
51+
elif name == "lookupNaive":
52+
template tab: untyped = a
53+
elif name == "lookupTables":
54+
var tab: Table[int, int]
55+
for i, ai in a:
56+
tab[ai] = i
57+
else: static: doAssert false, name
58+
let t = cpuTime()
59+
var c = 0
60+
let n = numIter * factor
61+
for i in 0..<n:
62+
for ai in a:
63+
c += algo(tab, ai)
64+
let t2 = cpuTime()-t
65+
let msgi = "$#\t$#\t$#\t$#" % [name, genDataName, $t2, $c]
66+
echo msgi # show intermediate progress
67+
msg.add msgi & "\n"
68+
69+
template main2(genData) =
70+
mainAux(genData, lookup)
71+
mainAux(genData, lookupTables)
72+
mainAux(genData, lookupNaive)
73+
74+
proc main() =
75+
main2(genData1)
76+
main2(genData2)
77+
echo "---\n" & msg.alignTable
78+
main()

0 commit comments

Comments
 (0)