Skip to content

SL-19314: Recast llsd serialization to write to a stream. #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions llsd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
BINARY_HEADER, NOTATION_HEADER, XML_HEADER,
LLSDBaseParser, LLSDParseError, LLSDSerializationError,
LongType, UnicodeType, binary, undef, uri)
from llsd.serde_binary import LLSDBinaryParser, format_binary, parse_binary, parse_binary_nohdr
from llsd.serde_notation import LLSDNotationFormatter, LLSDNotationParser, format_notation, parse_notation, parse_notation_nohdr
from llsd.serde_xml import LLSDXMLFormatter, LLSDXMLPrettyFormatter, format_pretty_xml, format_xml, parse_xml, parse_xml_nohdr
from llsd.serde_binary import (LLSDBinaryParser, format_binary, parse_binary, parse_binary_nohdr,
write_binary)
from llsd.serde_notation import (LLSDNotationFormatter, write_notation, format_notation,
LLSDNotationParser, parse_notation, parse_notation_nohdr)
from llsd.serde_xml import (LLSDXMLFormatter, LLSDXMLPrettyFormatter,
write_pretty_xml, write_xml, format_pretty_xml, format_xml,
parse_xml, parse_xml_nohdr)


def parse(something, mime_type = None):
Expand Down
72 changes: 52 additions & 20 deletions llsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def __init__(self, thing=None):
undef = _LLSD(None)


# 'binary' only exists so that a Python 2 caller can distinguish binary data
# from str data - since in Python 2, (bytes is str).
if PY2:
class binary(str):
"Simple wrapper for llsd.binary data."
Expand Down Expand Up @@ -174,7 +176,7 @@ def _format_datestr(v):
xml and notation serializations.
"""
if not isinstance(v, datetime.date) and not isinstance(v, datetime.datetime):
raise LLSDParseError("invalid date string %s passed to date formatter" % v)
raise LLSDSerializationError("invalid date string %s passed to date formatter" % v)

if not isinstance(v, datetime.datetime):
v = datetime.datetime.combine(v, datetime.time(0))
Expand Down Expand Up @@ -322,31 +324,61 @@ class LLSDBaseFormatter(object):
role of this base class is to provide self.type_map based on the methods
defined in its subclass.
"""
__slots__ = ['stream', 'type_map']

def __init__(self):
"Construct a new formatter dispatch table."
self.stream = None
self.type_map = {
type(None): self.UNDEF,
undef: self.UNDEF,
bool: self.BOOLEAN,
int: self.INTEGER,
LongType: self.INTEGER,
float: self.REAL,
uuid.UUID: self.UUID,
binary: self.BINARY,
str: self.STRING,
UnicodeType: self.STRING,
newstr: self.STRING,
uri: self.URI,
datetime.datetime: self.DATE,
datetime.date: self.DATE,
list: self.ARRAY,
tuple: self.ARRAY,
types.GeneratorType: self.ARRAY,
dict: self.MAP,
_LLSD: self.LLSD,
type(None): self._UNDEF,
undef: self._UNDEF,
bool: self._BOOLEAN,
int: self._INTEGER,
LongType: self._INTEGER,
float: self._REAL,
uuid.UUID: self._UUID,
binary: self._BINARY,
str: self._STRING,
UnicodeType: self._STRING,
newstr: self._STRING,
uri: self._URI,
datetime.datetime: self._DATE,
datetime.date: self._DATE,
list: self._ARRAY,
tuple: self._ARRAY,
types.GeneratorType: self._ARRAY,
dict: self._MAP,
_LLSD: self._LLSD,
}


def format(self, something):
"""
Pure Python implementation of the formatter.
Format a python object according to subclass formatting.

:param something: A python object (typically a dict) to be serialized.
:returns: A serialized bytes object.
"""
stream = io.BytesIO()
self.write(stream, something)
return stream.getvalue()

def write(self, stream, something):
"""
Serialize a python object to the passed binary 'stream' according to
subclass formatting.

:param stream: A binary file-like object to which to serialize 'something'.
:param something: A python object (typically a dict) to be serialized.
"""
self.stream = stream
try:
return self._write(something)
finally:
self.stream = None


_X_ORD = ord(b'x')
_BACKSLASH_ORD = ord(b'\\')
_DECODE_BUFF_ALLOC_SIZE = 1024
Expand Down
66 changes: 34 additions & 32 deletions llsd/serde_binary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import calendar
import datetime
import io
import struct
import uuid

Expand Down Expand Up @@ -152,74 +153,75 @@ def format_binary(something):
:param something: a python object (typically a dict) to be serialized.
:returns: Returns a LLSD binary formatted string.
"""
return BINARY_HEADER + b'\n' + _format_binary_recurse(something)
stream = io.BytesIO()
write_binary(stream, something)
return stream.getvalue()


def _format_binary_recurse(something):
"Binary formatter workhorse."
def _format_list(something):
array_builder = []
array_builder.append(b'[' + struct.pack('!i', len(something)))
for item in something:
array_builder.append(_format_binary_recurse(item))
array_builder.append(b']')
return b''.join(array_builder)
def write_binary(stream, something):
stream.write(b'<?llsd/binary?>\n')
_write_binary_recurse(stream, something)


def _write_binary_recurse(stream, something):
"Binary formatter workhorse."
if something is None:
return b'!'
stream.write(b'!')
elif isinstance(something, _LLSD):
return _format_binary_recurse(something.thing)
_write_binary_recurse(stream, something.thing)
elif isinstance(something, bool):
if something:
return b'1'
else:
return b'0'
stream.write(b'1' if something else b'0')
elif is_integer(something):
try:
return b'i' + struct.pack('!i', something)
stream.writelines([b'i', struct.pack('!i', something)])
except (OverflowError, struct.error) as exc:
raise LLSDSerializationError(str(exc), something)
elif isinstance(something, float):
try:
return b'r' + struct.pack('!d', something)
stream.writelines([b'r', struct.pack('!d', something)])
except SystemError as exc:
raise LLSDSerializationError(str(exc), something)
elif isinstance(something, uuid.UUID):
return b'u' + something.bytes
stream.writelines([b'u', something.bytes])
elif isinstance(something, binary):
return b'b' + struct.pack('!i', len(something)) + something
stream.writelines([b'b', struct.pack('!i', len(something)), something])
elif is_string(something):
something = _str_to_bytes(something)
return b's' + struct.pack('!i', len(something)) + something
stream.writelines([b's', struct.pack('!i', len(something)), something])
elif isinstance(something, uri):
return b'l' + struct.pack('!i', len(something)) + something
stream.writelines([b'l', struct.pack('!i', len(something)), something])
elif isinstance(something, datetime.datetime):
seconds_since_epoch = calendar.timegm(something.utctimetuple()) \
+ something.microsecond // 1e6
return b'd' + struct.pack('<d', seconds_since_epoch)
stream.writelines([b'd', struct.pack('<d', seconds_since_epoch)])
elif isinstance(something, datetime.date):
seconds_since_epoch = calendar.timegm(something.timetuple())
return b'd' + struct.pack('<d', seconds_since_epoch)
stream.writelines([b'd', struct.pack('<d', seconds_since_epoch)])
elif isinstance(something, (list, tuple)):
return _format_list(something)
_write_list(stream, something)
elif isinstance(something, dict):
map_builder = []
map_builder.append(b'{' + struct.pack('!i', len(something)))
stream.writelines([b'{', struct.pack('!i', len(something))])
for key, value in something.items():
key = _str_to_bytes(key)
map_builder.append(b'k' + struct.pack('!i', len(key)) + key)
map_builder.append(_format_binary_recurse(value))
map_builder.append(b'}')
return b''.join(map_builder)
stream.writelines([b'k', struct.pack('!i', len(key)), key])
_write_binary_recurse(stream, value)
stream.write(b'}')
else:
try:
return _format_list(list(something))
return _write_list(stream, list(something))
except TypeError:
raise LLSDSerializationError(
"Cannot serialize unknown type: %s (%s)" %
(type(something), something))


def _write_list(stream, something):
stream.writelines([b'[', struct.pack('!i', len(something))])
for item in something:
_write_binary_recurse(stream, item)
stream.write(b']')


def parse_binary(something):
"""
This is the basic public interface for parsing llsd+binary.
Expand Down
102 changes: 61 additions & 41 deletions llsd/serde_notation.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def _parse_array(self):
def _parse_uuid(self):
"Parse a uuid."
self._getc() # eat the beginning 'u'
# see comment on LLSDNotationFormatter.UUID() re use of latin-1
# see comment on LLSDNotationFormatter._UUID() re use of latin-1
return uuid.UUID(hex=self._getc(36).decode('latin-1'))

def _parse_uri(self):
Expand Down Expand Up @@ -346,22 +346,17 @@ class LLSDNotationFormatter(LLSDBaseFormatter):

See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
"""
__slots__ = []

def LLSD(self, v):
def _LLSD(self, v):
return self._generate(v.thing)
def UNDEF(self, v):
return b'!'
def BOOLEAN(self, v):
if v:
return b'true'
else:
return b'false'
def INTEGER(self, v):
return B("i%d") % v
def REAL(self, v):
return B("r%r") % v
def UUID(self, v):
def _UNDEF(self, v):
self.stream.write(b'!')
def _BOOLEAN(self, v):
self.stream.write(b'true' if v else b'false')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naive Q since I'm probably missing a lot of context. These were publicish methods that someone might use to generate fragments of notation format for a value before. Did nobody actually use them for such a purpose because this would break every such use? Should these all be renamed "_" (they're modifying an internal object)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These were publicish methods that someone might use to generate fragments of notation format for a value before. Did nobody actually use them for such a purpose because this would break every such use? Should these all be renamed "_" (they're modifying an internal object)?

I have always assumed that the historical names of these methods were chosen to emphasize the communication between LLSDBaseFormatter and its subclasses. I've never seen any llsd consumer directly reference any LLSDMumbleFormatter class, never mind call its type-specific methods.

That said, yes, those methods are intended solely for internal use by that class hierarchy. Perhaps they should be prefixed with an underscore. If we're going to break hypothetical consumers, better to break them overtly with AttributeError than by quietly returning None.

def _INTEGER(self, v):
self.stream.write(B("i%d") % v)
def _REAL(self, v):
self.stream.write(B("r%r") % v)
def _UUID(self, v):
# latin-1 is the byte-to-byte encoding, mapping \x00-\xFF ->
# \u0000-\u00FF. It's also the fastest encoding, I believe, from
# https://docs.python.org/3/library/codecs.html#encodings-and-unicode
Expand All @@ -370,24 +365,42 @@ def UUID(self, v):
# error behavior in case someone passes an invalid hex string, with
# things other than 0-9a-fA-F, so that they will fail in the UUID
# decode, rather than with a UnicodeError.
return B("u%s") % str(v).encode('latin-1')
def BINARY(self, v):
return b'b64"' + base64.b64encode(v).strip() + b'"'

def STRING(self, v):
return B("'%s'") % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b"'", b"\\'")
def URI(self, v):
return B('l"%s"') % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b'"', b'\\"')
def DATE(self, v):
return B('d"%s"') % _format_datestr(v)
def ARRAY(self, v):
return B("[%s]") % b','.join([self._generate(item) for item in v])
def MAP(self, v):
return B("{%s}") % b','.join([B("'%s':%s") % (_str_to_bytes(UnicodeType(key)).replace(b"\\", b"\\\\").replace(b"'", b"\\'"), self._generate(value))
for key, value in v.items()])
self.stream.writelines([b"u", str(v).encode('latin-1')])
def _BINARY(self, v):
self.stream.writelines([b'b64"', base64.b64encode(v).strip(), b'"'])

def _STRING(self, v):
self.stream.writelines([b"'", self._esc(v), b"'"])
def _URI(self, v):
self.stream.writelines([b'l"', self._esc(v, b'"'), b'"'])
def _DATE(self, v):
self.stream.writelines([b'd"', _format_datestr(v), b'"'])
def _ARRAY(self, v):
self.stream.write(b'[')
delim = b''
for item in v:
self.stream.write(delim)
self._generate(item)
delim = b','
self.stream.write(b']')
def _MAP(self, v):
self.stream.write(b'{')
delim = b''
for key, value in v.items():
self.stream.writelines([delim, b"'", self._esc(UnicodeType(key)), b"':"])
self._generate(value)
delim = b','
self.stream.write(b'}')

def _esc(self, data, quote=b"'"):
return _str_to_bytes(data).replace(b"\\", b"\\\\").replace(quote, b'\\'+quote)

def _generate(self, something):
"Generate notation from a single python object."
"""
Serialize a python object to self.stream as application/llsd+notation

:param something: a python object (typically a dict) to be serialized.
"""
t = type(something)
handler = self.type_map.get(t)
if handler:
Expand All @@ -396,19 +409,13 @@ def _generate(self, something):
return self.type_map[_LLSD](something)
else:
try:
return self.ARRAY(iter(something))
return self._ARRAY(iter(something))
except TypeError:
raise LLSDSerializationError(
"Cannot serialize unknown type: %s (%s)" % (t, something))

def format(self, something):
"""
Format a python object as application/llsd+notation

:param something: a python object (typically a dict) to be serialized.
:returns: Returns a LLSD notation formatted string.
"""
return self._generate(something)
# _write() method is an alias for _generate()
_write = _generate


def format_notation(something):
Expand All @@ -423,6 +430,19 @@ def format_notation(something):
return LLSDNotationFormatter().format(something)


def write_notation(stream, something):
"""
Serialize to passed binary 'stream' a python object 'something' as
application/llsd+notation.

:param stream: a binary stream open for writing.
:param something: a python object (typically a dict) to be serialized.

See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
"""
return LLSDNotationFormatter().write(stream, something)


def parse_notation(something):
"""
This is the basic public interface for parsing llsd+notation.
Expand Down
Loading