Skip to content

Speed up parsing notation LLSD #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Nov 17, 2022
124 changes: 69 additions & 55 deletions llsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,19 +321,6 @@ def _to_python(node):
return NODE_HANDLERS[node.tag](node)


def _hex_as_nybble(hex):
"Accepts a single hex character and returns a nybble."
if (hex >= b'0') and (hex <= b'9'):
return ord(hex) - ord(b'0')
elif (hex >= b'a') and (hex <=b'f'):
return 10 + ord(hex) - ord(b'a')
elif (hex >= b'A') and (hex <=b'F'):
return 10 + ord(hex) - ord(b'A')
else:
raise LLSDParseError('Invalid hex character: %s' % hex)



class LLSDBaseFormatter(object):
"""
This base class cannot be instantiated on its own: it assumes a subclass
Expand Down Expand Up @@ -366,13 +353,22 @@ def __init__(self):
}


_X_ORD = ord(b'x')
_BACKSLASH_ORD = ord(b'\\')
_DECODE_BUFF_ALLOC_SIZE = 1024


class LLSDBaseParser(object):
"""
Utility methods useful for parser subclasses.
"""
__slots__ = ['_buffer', '_index', '_decode_buff']

def __init__(self):
self._buffer = b''
self._index = 0
self._index = 0
# Scratch space for decoding delimited strings
self._decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE)

def _error(self, message, offset=0):
try:
Expand All @@ -399,53 +395,71 @@ def _getc(self, num=1):

# map char following escape char to corresponding character
_escaped = {
b'a': b'\a',
b'b': b'\b',
b'f': b'\f',
b'n': b'\n',
b'r': b'\r',
b't': b'\t',
b'v': b'\v',
ord(b'a'): ord(b'\a'),
ord(b'b'): ord(b'\b'),
ord(b'f'): ord(b'\f'),
ord(b'n'): ord(b'\n'),
ord(b'r'): ord(b'\r'),
ord(b't'): ord(b'\t'),
ord(b'v'): ord(b'\v'),
}

def _parse_string_delim(self, delim):
"Parse a delimited string."
parts = bytearray()
found_escape = False
found_hex = False
found_digit = False
byte = 0
insert_idx = 0
delim_ord = ord(delim)
# Preallocate a working buffer for the decoded string output
# to avoid allocs in the hot loop.
decode_buff = self._decode_buff
# Cache these in locals, otherwise we have to perform a lookup on
# `self` in the hot loop.
buff = self._buffer
read_idx = self._index
cc = 0
while True:
cc = self._getc()
if found_escape:
if found_hex:
if found_digit:
found_escape = False
found_hex = False
found_digit = False
byte <<= 4
byte |= _hex_as_nybble(cc)
parts.append(byte)
byte = 0
try:
cc = buff[read_idx]
read_idx += 1

if cc == _BACKSLASH_ORD:
# Backslash, figure out if this is an \xNN hex escape or
# something like \t
cc = buff[read_idx]
read_idx += 1
if cc == _X_ORD:
# It's a hex escape. char is the value of the two
# following hex nybbles
cc = int(chr(buff[read_idx]), 16) << 4
read_idx += 1
cc |= int(chr(buff[read_idx]), 16)
read_idx += 1
else:
found_digit = True
byte = _hex_as_nybble(cc)
elif cc == b'x':
found_hex = True
else:
found_escape = False
# escape char preceding anything other than the chars in
# _escaped just results in that same char without the
# escape char
parts.extend(self._escaped.get(cc, cc))
elif cc == b'\\':
found_escape = True
elif cc == delim:
break
else:
parts.extend(cc)
# escape char preceding anything other than the chars
# in _escaped just results in that same char without
# the escape char
cc = self._escaped.get(cc, cc)
elif cc == delim_ord:
break
except IndexError:
# We can be reasonably sure that any IndexErrors inside here
# were caused by an out-of-bounds `buff[read_idx]`.
self._index = read_idx
self._error("Trying to read past end of buffer")

decode_buff[insert_idx] = cc
insert_idx += 1

# We inserted a character, check if we need to expand the buffer.
if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0:
# Any additions may now overflow the decoding buffer, make
# a new expanded buffer containing the existing contents.
decode_buff = bytearray(decode_buff)
decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be faster still to do something like:

self._decode_buffs = [bytearray(_DECODE_BUFF_ALLOC_SIZE)]
# ...
buff_idx = 0
decode_buff = self._decode_buffs[buff_idx]
# ...
try:
    decode_buff[insert_idx] = cc
except IndexError:
    buff_idx += 1
    try:
        decode_buff = self._decode_buffs[buff_idx]
    except IndexError:
        decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE)
        self._decode_buffs.append(decode_buff)
    insert_idx = 0
    decode_buff[insert_idx] = cc
insert_idx += 1
# ...
try:
    return b''.join(self._decode_buffs[:-1] + [decode_buff[:insert_idx]]).decode('utf-8')
except UnicodeDecodeError as exc:
    # ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends on how many oversized strings you expect in the input stream. The code you submitted leaves you with a single larger buffer, prepared to handle many oversized strings without further allocations. My suggestion above should expand faster the first time, but requires consolidating multiple bytearrays for each oversized string. Plus which it's admittedly more complex.

I still suggest the code you wrote would be improved by catching IndexError on decode_buff assignment, though, rather than testing insert_idx as shown. Also I suspect there's a bug in your test.

Say you're working on the second oversized string in the same input stream, so len(self._decode_buff) == 2*_DECODE_BUFF_ALLOC_SIZE. You just inserted the byte at (_DECODE_BUFF_ALLOC_SIZE - 1) and incremented insert_idx. Now, although insert_idx == len(decode_buff)/2, insert_idx % _DECODE_BUFF_ALLOC_SIZE in fact equals 0, so you extend decode_buff -- resulting in _DECODE_BUFF_ALLOC_SIZE bytes of garbage in the buffer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code you submitted leaves you with a single larger buffer, prepared to handle many oversized strings without further allocations.

Whoops, not true: your expanded decode_buff isn't stored in self._decode_buff, so subsequent oversized strings would in fact require a new expanded decode_buff.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, not true: your expanded decode_buff isn't stored in self._decode_buff, so subsequent oversized strings would in fact require a new expanded decode_buff.

Yep, my reasoning there was I sometimes use long-lived LLSD*Parser objects and I was concerned about keeping a large scratch buffer around just because I'd previously parsed a large string.

except IndexError:

That is a good point! I forgot that in Python it's much less expensive to handle the (potential) IndexError than check if we're about to overflow. The internal IndexError check is priced in and you don't really get to opt out anyway. Doing an opportunistic assignment and doing my buffer copy in the except IndexError shaves off about 100ms! I think this strategy ends up being easier to read as well (I didn't feel great about the %.)

The buffer copy itself isn't terribly expensive relative to juggling + concatenating multiple buffers though. Buffer concatenation was a tiny bit slower for payloads containing strings mostly under 1024 bytes.

try:
return parts.decode('utf-8')
# Sync our local read index with the canonical one
self._index = read_idx
# Slice off only what we used of the working decode buffer
return decode_buff[:insert_idx].decode('utf-8')
except UnicodeDecodeError as exc:
self._error(exc)

Expand All @@ -457,4 +471,4 @@ def starts_with(startstr, something):
pos = something.tell()
s = something.read(len(startstr))
something.seek(pos, os.SEEK_SET)
return (s == startstr)
return (s == startstr)
2 changes: 2 additions & 0 deletions llsd/serde_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class LLSDBinaryParser(LLSDBaseParser):

See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
"""
__slots__ = ['_dispatch', '_keep_binary']

def __init__(self):
super(LLSDBinaryParser, self).__init__()
# One way of dispatching based on the next character we see would be a
Expand Down
2 changes: 2 additions & 0 deletions llsd/serde_notation.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ class LLSDNotationFormatter(LLSDBaseFormatter):

See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
"""
__slots__ = []

def LLSD(self, v):
return self._generate(v.thing)
def UNDEF(self, v):
Expand Down
1 change: 1 addition & 0 deletions llsd/serde_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class LLSDXMLFormatter(LLSDBaseFormatter):
module level format_xml is the most convenient interface to this
functionality.
"""
__slots__ = []

def _elt(self, name, contents=None):
"Serialize a single element."
Expand Down
6 changes: 6 additions & 0 deletions tests/llsd_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,12 @@ def testParseNotationIncorrectMIME(self):
except llsd.LLSDParseError:
pass

def testParseNotationUnterminatedString(self):
"""
Test with an unterminated delimited string
"""
self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")


class LLSDBinaryUnitTest(unittest.TestCase):
"""
Expand Down