secondlife · bennettgoble · Nov 17, 2022 · Sep 28, 2022 · Sep 28, 2022 · Sep 28, 2022
diff --git a/llsd/base.py b/llsd/base.py
@@ -321,19 +321,6 @@ def _to_python(node):
     return NODE_HANDLERS[node.tag](node)
 
 
-def _hex_as_nybble(hex):
-    "Accepts a single hex character and returns a nybble."
-    if (hex >= b'0') and (hex <= b'9'):
-        return ord(hex) - ord(b'0')
-    elif (hex >= b'a') and (hex <=b'f'):
-        return 10 + ord(hex) - ord(b'a')
-    elif (hex >= b'A') and (hex <=b'F'):
-        return 10 + ord(hex) - ord(b'A')
-    else:
-        raise LLSDParseError('Invalid hex character: %s' % hex)
-
-
-
 class LLSDBaseFormatter(object):
     """
     This base class cannot be instantiated on its own: it assumes a subclass
@@ -366,13 +353,22 @@ def __init__(self):
         }
 
 
+_X_ORD = ord(b'x')
+_BACKSLASH_ORD = ord(b'\\')
+_DECODE_BUFF_ALLOC_SIZE = 1024
+
+
 class LLSDBaseParser(object):
     """
     Utility methods useful for parser subclasses.
     """
+    __slots__ = ['_buffer', '_index', '_decode_buff']
+
     def __init__(self):
         self._buffer = b''
-        self._index  = 0
+        self._index = 0
+        # Scratch space for decoding delimited strings
+        self._decode_buff = bytearray(_DECODE_BUFF_ALLOC_SIZE)
 
     def _error(self, message, offset=0):
         try:
@@ -399,53 +395,71 @@ def _getc(self, num=1):
 
     # map char following escape char to corresponding character
     _escaped = {
-        b'a': b'\a',
-        b'b': b'\b',
-        b'f': b'\f',
-        b'n': b'\n',
-        b'r': b'\r',
-        b't': b'\t',
-        b'v': b'\v',
+        ord(b'a'): ord(b'\a'),
+        ord(b'b'): ord(b'\b'),
+        ord(b'f'): ord(b'\f'),
+        ord(b'n'): ord(b'\n'),
+        ord(b'r'): ord(b'\r'),
+        ord(b't'): ord(b'\t'),
+        ord(b'v'): ord(b'\v'),
     }
 
     def _parse_string_delim(self, delim):
         "Parse a delimited string."
-        parts = bytearray()
-        found_escape = False
-        found_hex = False
-        found_digit = False
-        byte = 0
+        insert_idx = 0
+        delim_ord = ord(delim)
+        # Preallocate a working buffer for the decoded string output
+        # to avoid allocs in the hot loop.
+        decode_buff = self._decode_buff
+        # Cache these in locals, otherwise we have to perform a lookup on
+        # `self` in the hot loop.
+        buff = self._buffer
+        read_idx = self._index
+        cc = 0
         while True:
-            cc = self._getc()
-            if found_escape:
-                if found_hex:
-                    if found_digit:
-                        found_escape = False
-                        found_hex = False
-                        found_digit = False
-                        byte <<= 4
-                        byte |= _hex_as_nybble(cc)
-                        parts.append(byte)
-                        byte = 0
+            try:
+                cc = buff[read_idx]
+                read_idx += 1
+
+                if cc == _BACKSLASH_ORD:
+                    # Backslash, figure out if this is an \xNN hex escape or
+                    # something like \t
+                    cc = buff[read_idx]
+                    read_idx += 1
+                    if cc == _X_ORD:
+                        # It's a hex escape. char is the value of the two
+                        # following hex nybbles
+                        cc = int(chr(buff[read_idx]), 16) << 4
+                        read_idx += 1
+                        cc |= int(chr(buff[read_idx]), 16)
+                        read_idx += 1
                     else:
-                        found_digit = True
-                        byte = _hex_as_nybble(cc)
-                elif cc == b'x':
-                    found_hex = True
-                else:
-                    found_escape = False
-                    # escape char preceding anything other than the chars in
-                    # _escaped just results in that same char without the
-                    # escape char
-                    parts.extend(self._escaped.get(cc, cc))
-            elif cc == b'\\':
-                found_escape = True
-            elif cc == delim:
-                break
-            else:
-                parts.extend(cc)
+                        # escape char preceding anything other than the chars
+                        # in _escaped just results in that same char without
+                        # the escape char
+                        cc = self._escaped.get(cc, cc)
+                elif cc == delim_ord:
+                    break
+            except IndexError:
+                # We can be reasonably sure that any IndexErrors inside here
+                # were caused by an out-of-bounds `buff[read_idx]`.
+                self._index = read_idx
+                self._error("Trying to read past end of buffer")
+
+            decode_buff[insert_idx] = cc
+            insert_idx += 1
+
+            # We inserted a character, check if we need to expand the buffer.
+            if insert_idx % _DECODE_BUFF_ALLOC_SIZE == 0:
+                # Any additions may now overflow the decoding buffer, make
+                # a new expanded buffer containing the existing contents.
+                decode_buff = bytearray(decode_buff)
+                decode_buff.extend(b"\x00" * _DECODE_BUFF_ALLOC_SIZE)
         try:
-            return parts.decode('utf-8')
+            # Sync our local read index with the canonical one
+            self._index = read_idx
+            # Slice off only what we used of the working decode buffer
+            return decode_buff[:insert_idx].decode('utf-8')
         except UnicodeDecodeError as exc:
             self._error(exc)
 
@@ -457,4 +471,4 @@ def starts_with(startstr, something):
         pos = something.tell()
         s = something.read(len(startstr))
         something.seek(pos, os.SEEK_SET)
-        return (s == startstr)
+        return (s == startstr)
diff --git a/llsd/serde_binary.py b/llsd/serde_binary.py
@@ -13,6 +13,8 @@ class LLSDBinaryParser(LLSDBaseParser):
 
     See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
     """
+    __slots__ = ['_dispatch', '_keep_binary']
+
     def __init__(self):
         super(LLSDBinaryParser, self).__init__()
         # One way of dispatching based on the next character we see would be a

diff --git a/llsd/serde_notation.py b/llsd/serde_notation.py
@@ -328,6 +328,8 @@ class LLSDNotationFormatter(LLSDBaseFormatter):
 
     See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
     """
+    __slots__ = []
+
     def LLSD(self, v):
         return self._generate(v.thing)
     def UNDEF(self, v):

diff --git a/llsd/serde_xml.py b/llsd/serde_xml.py
@@ -36,6 +36,7 @@ class LLSDXMLFormatter(LLSDBaseFormatter):
     module level format_xml is the most convenient interface to this
     functionality.
     """
+    __slots__ = []
 
     def _elt(self, name, contents=None):
         "Serialize a single element."

diff --git a/tests/llsd_test.py b/tests/llsd_test.py
@@ -507,6 +507,12 @@ def testParseNotationIncorrectMIME(self):
         except llsd.LLSDParseError:
             pass
 
+    def testParseNotationUnterminatedString(self):
+        """
+        Test with an unterminated delimited string
+        """
+        self.assertRaises(llsd.LLSDParseError, self.llsd.parse, b"'foo")
+
 
 class LLSDBinaryUnitTest(unittest.TestCase):
     """