-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add efficient std/private/lookuptables
- Loading branch information
1 parent
53935b8
commit ada3678
Showing
5 changed files
with
181 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#[ | ||
Experimental API, subject to change. | ||
## benchmark | ||
see tests/benchmarks/tlookuptables.nim | ||
## design goals | ||
* low level module with few/no dependencies, which can be used in other modules; | ||
this precludes importing math, tables, hashes. | ||
* high performance (faster than `std/tables`) | ||
* avoid the complexity of tables.nim + friends (but could serve as building block for it) | ||
]# | ||
|
||
func nextPowerOfTwo*(x: int): int {.inline.} = | ||
## documented (for now) in `nextPowerOfTwo.math` | ||
result = x - 1 | ||
when defined(cpu64): | ||
result = result or (result shr 32) | ||
when sizeof(int) > 2: | ||
result = result or (result shr 16) | ||
when sizeof(int) > 1: | ||
result = result or (result shr 8) | ||
result = result or (result shr 4) | ||
result = result or (result shr 2) | ||
result = result or (result shr 1) | ||
result += 1 + ord(x <= 0) | ||
|
||
type | ||
SimpleHash* = uint | ||
# math works out better with `uint` than with | ||
# int as done in `hash.Hash` | ||
LookupTable*[T] = object | ||
cells*: seq[SimpleHash] | ||
keys*: seq[T] | ||
|
||
const pseudoRandomMixing = 5 | ||
# this could be chosen to minimize the exppected number | ||
# of calls to `nextCell`, if the key distribution is known. | ||
|
||
template nextCell(h, m): untyped = | ||
## pseudo-random probing | ||
(h * pseudoRandomMixing) and m | ||
|
||
template simpleHash[T](a: T): SimpleHash = | ||
cast[SimpleHash](a) | ||
|
||
proc initLookupTable*[T](a: openArray[T]): LookupTable[T] = | ||
## Returns a lookup table that supports efficient lookup. | ||
let size = max(2, nextPowerOfTwo(a.len * 3 div 2)) | ||
result.cells.setLen size | ||
result.keys.setLen size | ||
let m = SimpleHash(size - 1) | ||
var i = 1'u | ||
for ai in a: | ||
var index = ai.simpleHash and m | ||
while true: | ||
let h = result.cells[index] | ||
if h == 0: break | ||
index = nextCell(h, m) | ||
result.cells[index] = i | ||
result.keys[index] = ai | ||
inc(i) | ||
|
||
proc lookup*[T](tab: LookupTable[T], key: T): int = | ||
## return `-1` if `key` not found, else an index `i` | ||
## at which we can find `key`. | ||
runnableExamples: | ||
let a = @[100.0, 0.0, 13.3, -3.12] | ||
let b = a.initLookupTable | ||
assert b.lookup(13.3) == 2 # found at index 2 | ||
assert b.lookup(0.3) == -1 # not found | ||
let size = tab.cells.len | ||
let m = SimpleHash(size - 1) | ||
var index = key.simpleHash and m | ||
while true: | ||
let h = tab.cells[index] | ||
if h == 0: return -1 | ||
elif tab.keys[index] == key: | ||
return cast[int](h - 1) | ||
else: | ||
index = nextCell(h, m) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
discard """ | ||
joinable: false | ||
""" | ||
|
||
#[ | ||
nim r -d:numIter:10000000 -d:danger tests/benchmarks/tlookuptables.nim | ||
on OSX: | ||
lookup genData1 1.111774 2100000000 | ||
lookupTables genData1 2.469191 2100000000 | ||
lookupNaive genData1 1.580014 2100000000 | ||
lookup genData2 0.5989679999999993 12250000000 | ||
lookupTables genData2 1.704296 12250000000 | ||
lookupNaive genData2 4.306558000000001 12250000000 | ||
]# | ||
|
||
import std/[times, tables, strutils] | ||
import std/private/lookuptables | ||
import std/private/asciitables | ||
|
||
const numIter {.intDefine.} = 100 | ||
|
||
proc lookupTables[T](a: Table[T, int], key: T): int = | ||
a[key] | ||
|
||
proc lookupNaive[T](a: seq[T], key: T): int = | ||
for i, ai in a: | ||
if ai == key: return i | ||
return -1 | ||
|
||
proc genData1(): seq[int] = | ||
# checks performance on small data | ||
result = @[100, 13, 15, 12, 0, -3, 44] | ||
|
||
proc genData2(): seq[int] = | ||
# size 50 | ||
for i in 0..<50: | ||
result.add i * 37 | ||
|
||
var msg = "" | ||
|
||
template mainAux(genData, algo) = | ||
const genDataName = astToStr(genData) | ||
when genDataName == "genData1": (let factor = 10) | ||
elif genDataName == "genData2": (let factor = 1) | ||
else: static: doAssert false, genDataName | ||
|
||
let a = genData() | ||
const name = astToStr(algo) | ||
when name == "lookup": | ||
let tab = initLookupTable(a) | ||
elif name == "lookupNaive": | ||
template tab: untyped = a | ||
elif name == "lookupTables": | ||
var tab: Table[int, int] | ||
for i, ai in a: | ||
tab[ai] = i | ||
else: static: doAssert false, name | ||
let t = cpuTime() | ||
var c = 0 | ||
let n = numIter * factor | ||
for i in 0..<n: | ||
for ai in a: | ||
c += algo(tab, ai) | ||
let t2 = cpuTime()-t | ||
let msgi = "$#\t$#\t$#\t$#" % [name, genDataName, $t2, $c] | ||
echo msgi # show intermediate progress | ||
msg.add msgi & "\n" | ||
|
||
template main2(genData) = | ||
mainAux(genData, lookup) | ||
mainAux(genData, lookupTables) | ||
mainAux(genData, lookupNaive) | ||
|
||
proc main() = | ||
main2(genData1) | ||
main2(genData2) | ||
echo "---\n" & msg.alignTable | ||
main() |