Skip to content

Commit e47ca7a

Browse files
maltheaardappel
authored andcommitted
Use a hash table to index existing vtables (google#5314)
* Use a hash table to index existing vtables This allows for quick deduplication even in situations where there might be thousands of vtables due to 'combinatoric explosion'. This fixes issue google#5301. * Refactor 0-offset trimming * Improve deduplication benchmark The routine now generates a set of realistic logical layouts and uses a timer function that randomly picks a layout for each iteration. The benchmark runs in batches of # of logical layouts = 1, 10, 100, 1000. (Note that due to alignment, the actual number of vtables is usually slightly higher.)
1 parent d79f4e9 commit e47ca7a

File tree

2 files changed

+68
-61
lines changed

2 files changed

+68
-61
lines changed

python/flatbuffers/builder.py

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class Builder(object):
9494
It holds the following internal state:
9595
- Bytes: an array of bytes.
9696
- current_vtable: a list of integers.
97-
- vtables: a list of vtable entries (i.e. a list of list of integers).
97+
- vtables: a hash of vtable entries.
9898
9999
Attributes:
100100
Bytes: The internal `bytearray` for the Builder.
@@ -129,7 +129,7 @@ def __init__(self, initialSize):
129129
self.head = UOffsetTFlags.py_type(initialSize)
130130
self.minalign = 1
131131
self.objectEnd = None
132-
self.vtables = []
132+
self.vtables = {}
133133
self.nested = False
134134
## @endcond
135135
self.finished = False
@@ -191,52 +191,45 @@ def WriteVtable(self):
191191
self.PrependSOffsetTRelative(0)
192192

193193
objectOffset = self.Offset()
194-
existingVtable = None
195-
196-
# Trim trailing 0 offsets.
197-
while self.current_vtable and self.current_vtable[-1] == 0:
198-
self.current_vtable.pop()
199-
200-
# Search backwards through existing vtables, because similar vtables
201-
# are likely to have been recently appended. See
202-
# BenchmarkVtableDeduplication for a case in which this heuristic
203-
# saves about 30% of the time used in writing objects with duplicate
204-
# tables.
205-
206-
i = len(self.vtables) - 1
207-
while i >= 0:
208-
# Find the other vtable, which is associated with `i`:
209-
vt2Offset = self.vtables[i]
210-
vt2Start = len(self.Bytes) - vt2Offset
211-
vt2Len = encode.Get(packer.voffset, self.Bytes, vt2Start)
212-
213-
metadata = VtableMetadataFields * N.VOffsetTFlags.bytewidth
214-
vt2End = vt2Start + vt2Len
215-
vt2 = self.Bytes[vt2Start+metadata:vt2End]
216-
217-
# Compare the other vtable to the one under consideration.
218-
# If they are equal, store the offset and break:
219-
if vtableEqual(self.current_vtable, objectOffset, vt2):
220-
existingVtable = vt2Offset
221-
break
222-
223-
i -= 1
224-
225-
if existingVtable is None:
194+
195+
vtKey = []
196+
trim = True
197+
for elem in reversed(self.current_vtable):
198+
if elem == 0:
199+
if trim:
200+
continue
201+
else:
202+
elem = objectOffset - elem
203+
trim = False
204+
205+
vtKey.append(elem)
206+
207+
vtKey = tuple(vtKey)
208+
vt2Offset = self.vtables.get(vtKey)
209+
if vt2Offset is None:
226210
# Did not find a vtable, so write this one to the buffer.
227211

228212
# Write out the current vtable in reverse , because
229213
# serialization occurs in last-first order:
230214
i = len(self.current_vtable) - 1
215+
trailing = 0
216+
trim = True
231217
while i >= 0:
232218
off = 0
233-
if self.current_vtable[i] != 0:
219+
elem = self.current_vtable[i]
220+
i -= 1
221+
222+
if elem == 0:
223+
if trim:
224+
trailing += 1
225+
continue
226+
else:
234227
# Forward reference to field;
235228
# use 32bit number to ensure no overflow:
236-
off = objectOffset - self.current_vtable[i]
229+
off = objectOffset - elem
230+
trim = False
237231

238232
self.PrependVOffsetT(off)
239-
i -= 1
240233

241234
# The two metadata fields are written last.
242235

@@ -245,7 +238,7 @@ def WriteVtable(self):
245238
self.PrependVOffsetT(VOffsetTFlags.py_type(objectSize))
246239

247240
# Second, store the vtable bytesize:
248-
vBytes = len(self.current_vtable) + VtableMetadataFields
241+
vBytes = len(self.current_vtable) - trailing + VtableMetadataFields
249242
vBytes *= N.VOffsetTFlags.bytewidth
250243
self.PrependVOffsetT(VOffsetTFlags.py_type(vBytes))
251244

@@ -257,17 +250,16 @@ def WriteVtable(self):
257250

258251
# Finally, store this vtable in memory for future
259252
# deduplication:
260-
self.vtables.append(self.Offset())
253+
self.vtables[vtKey] = self.Offset()
261254
else:
262255
# Found a duplicate vtable.
263-
264256
objectStart = SOffsetTFlags.py_type(len(self.Bytes) - objectOffset)
265257
self.head = UOffsetTFlags.py_type(objectStart)
266258

267259
# Write the offset to the found vtable in the
268260
# already-allocated SOffsetT at the beginning of this object:
269261
encode.Write(packer.soffset, self.Bytes, self.Head(),
270-
SOffsetTFlags.py_type(existingVtable - objectOffset))
262+
SOffsetTFlags.py_type(vt2Offset - objectOffset))
271263

272264
self.current_vtable = None
273265
return objectOffset

tests/py_test.py

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import ctypes
2222
from collections import defaultdict
2323
import math
24+
import random
2425
import timeit
2526
import unittest
2627

@@ -1617,26 +1618,40 @@ def BenchmarkVtableDeduplication(count):
16171618
When count is large (as in long benchmarks), memory usage may be high.
16181619
'''
16191620

1620-
prePop = 10
1621-
builder = flatbuffers.Builder(0)
1622-
1623-
# pre-populate some vtables:
1624-
for i in compat_range(prePop):
1625-
builder.StartObject(i)
1626-
for j in compat_range(i):
1627-
builder.PrependInt16Slot(j, j, 0)
1628-
builder.EndObject()
1629-
1630-
# benchmark deduplication of a new vtable:
1631-
def f():
1632-
builder.StartObject(prePop)
1633-
for j in compat_range(prePop):
1634-
builder.PrependInt16Slot(j, j, 0)
1635-
builder.EndObject()
1636-
1637-
duration = timeit.timeit(stmt=f, number=count)
1638-
rate = float(count) / duration
1639-
print(('vtable deduplication rate: %.2f/sec' % rate))
1621+
for prePop in (1, 10, 100, 1000):
1622+
builder = flatbuffers.Builder(0)
1623+
n = 1 + int(math.log(prePop, 1.5))
1624+
1625+
# generate some layouts:
1626+
layouts = set()
1627+
r = list(compat_range(n))
1628+
while len(layouts) < prePop:
1629+
layouts.add(tuple(sorted(random.sample(r, int(max(1, n / 2))))))
1630+
1631+
layouts = list(layouts)
1632+
1633+
# pre-populate vtables:
1634+
for layout in layouts:
1635+
builder.StartObject(n)
1636+
for j in layout:
1637+
builder.PrependInt16Slot(j, j, 0)
1638+
builder.EndObject()
1639+
1640+
# benchmark deduplication of a new vtable:
1641+
def f():
1642+
layout = random.choice(layouts)
1643+
builder.StartObject(n)
1644+
for j in layout:
1645+
builder.PrependInt16Slot(j, j, 0)
1646+
builder.EndObject()
1647+
1648+
duration = timeit.timeit(stmt=f, number=count)
1649+
rate = float(count) / duration
1650+
print(('vtable deduplication rate (n=%d, vtables=%d): %.2f sec' % (
1651+
prePop,
1652+
len(builder.vtables),
1653+
rate))
1654+
)
16401655

16411656

16421657
def BenchmarkCheckReadBuffer(count, buf, off):

0 commit comments

Comments
 (0)