diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b5f54c96..36b99958 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -219,3 +219,16 @@ jobs: --toolchain-family ${{ matrix.compiler }} \ --endianness ${{ matrix.endianness }} \ ${{ matrix.flag }} + + language-verification-python: + runs-on: ubuntu-latest + needs: test + container: ghcr.io/opencyphal/toxic:tx22.4.1 + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: verify + run: | + cd verification/python + nox diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..c70118b6 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,8 @@ +version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.11" +python: + install: + - requirements: requirements.txt diff --git a/.vscode/settings.json b/.vscode/settings.json index b0ffbcd8..03074030 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -39,6 +39,7 @@ "C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools", "files.associations": { "*.py.template": "python", + "nunavut_support.j2": "python", "*.cc": "cpp", "*.hpp": "cpp", "__bit_reference": "cpp", @@ -211,4 +212,55 @@ "/dsdl/i", "/bitspan/" ], + "cSpell.words": [ + "allclose", + "astype", + "autouse", + "bitorder", + "bools", + "builtins", + "Bxxx", + "caplog", + "CDEF", + "codegen", + "Cyphal", + "doctests", + "DSDL", + "dtype", + "EDCB", + "elementwise", + "emptylines", + "endianness", + "errstate", + "fillvalue", + "fpid", + "frombuffer", + "htmlcov", + "itemsize", + "Kirienko", + "maxsplit", + "nbytes", + "ndarray", + "ndim", + "nnvg", + "noxfile", + "opencyphal", + "outdir", + "packbits", + "Pavel", + "postprocessor", + "postprocessors", + "pycyphal", + "pydsdl", + "roadmap", + "Sriram", + "tobytes", + "transcompilation", + "typecheck", + "uavcan", + "unpackbits", + "unseparate", + "unstropped", + "WKCV" + ], } diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 13a4390b..52bc9da3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -188,7 +188,7 @@ We rely on `read the docs`_ to build our documentation from github but we also v as part of our tox build. This means you can view a local copy after completing a full, successful test run (See `Running The Tests`_) or do :code:`docker run --rm -t -v $PWD:/repo ghcr.io/opencyphal/toxic:tx22.4.1 /bin/sh -c "tox -e docs"` to build -the docs target. You can open the index.html under .tox/docs/tmp/index.html or run a local +the docs target. You can open the index.html under ``.tox/docs/tmp/index.html`` or run a local web-server:: python3 -m http.server --directory .tox/docs/tmp & diff --git a/README.rst b/README.rst index 54233c25..d2cfb399 100644 --- a/README.rst +++ b/README.rst @@ -36,13 +36,9 @@ and it can be used to generate code for other languages if custom templates (and Currently, the following languages are supported out of the box: - **C11** (generates header-only libraries) -- **HTML** (generates documentation pages) (experimental support) - -The following languages are currently on the roadmap: - -- **Python** (already supported in `Pycyphal`_, pending - `transplantation into Nunavut `_) -- **C++ 14 and newer** (generates header-only libraries; `work-in-progress `_) +- **C++** (generates header-only libraries; `work-in-progress `_) +- **Python** (generates Python packages) +- **HTML** (generates documentation pages) Nunavut is named after the `Canadian territory`_. We chose the name because it is a beautiful word to say and read. @@ -88,6 +84,17 @@ documentation sections. nnvg --experimental-languages --target-language html public_regulated_data_types/reg --lookup-dir public_regulated_data_types/uavcan nnvg --experimental-languages --target-language html public_regulated_data_types/uavcan +Generate Python packages using the command-line tool +---------------------------------------------------- + +This example assumes that the public regulated namespace directories ``reg`` and ``uavcan`` reside under +``public_regulated_data_types/``. +Nunavut is invoked to generate code for the former. + +.. code-block:: shell + + nnvg --target-language py public_regulated_data_types/reg --lookup-dir public_regulated_data_types/uavcan + Use custom templates -------------------- diff --git a/conf.py b/conf.py index c5d34f22..5439a007 100644 --- a/conf.py +++ b/conf.py @@ -28,7 +28,7 @@ # The full version, including alpha/beta/rc tags release = nunavut_version -exclude_patterns = ["**/test"] +exclude_patterns = ["**/test", "**/.nox"] with open(".gitignore", "r") as gif: for line in gif: @@ -67,7 +67,7 @@ source_suffix = ".rst" # The master toctree document. -master_doc = "docs/index" +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/conftest.py b/conftest.py index d208e18f..572538bc 100644 --- a/conftest.py +++ b/conftest.py @@ -397,6 +397,7 @@ def mock_environment(request): # type: ignore "**/CONTRIBUTING.rst", "**/verification/*", "**/prof/*", + "*.j2", "*.png", ], fixtures=[ diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 9bd26fab..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,13 +0,0 @@ - -.. toctree:: - :maxdepth: 2 - :hidden: - - api/library - languages - templates - CLI (nnvg) - dev - Appendix - -.. include:: ../README.rst diff --git a/docs/languages.rst b/docs/languages.rst index 9e81d377..7b6ba411 100644 --- a/docs/languages.rst +++ b/docs/languages.rst @@ -39,3 +39,37 @@ nnvg command --experimental-languages \ -I path/to/public_regulated_data_types/uavcan \ /path/to/my_types + +************************* +Python +************************* + +The Python language support generates Python packages that depend on the following packages: + +* **PyDSDL** --- maintained by the OpenCyphal team at https://github.com/OpenCyphal/pydsdl. +* **NumPy** --- a third-party dependency needed for fast serialization of arrays, esp. bit arrays. +* :code:`nunavut_support.py` --- produced by Nunavut itself and stored next to the other generated packages. + When redistributing generated code, this package should be included as well. + +These are the only dependencies of the generated code. Nunavut itself is notably excluded from this list. +The generated code should be compatible with all current versions of Python. +To see the specific versions of Python and dependencies that generated code is tested against, +please refer to ``verification/python`` in the source tree. + +At the moment there are no code generation options for Python; +that is, the generated code is always the same irrespective of the options given. + +The ``nunavut_support.py`` module includes several members that are useful for working with generated code. +The documentation for each member is provided in the docstrings of the module itself; +please be sure to read it. +The most important members are: + +* :code:`serialize`, :code:`deserialize` --- (de)serialize a DSDL object. +* :code:`get_model`, :code:`get_class` --- map a Python class to a PyDSDL AST model and vice versa. +* :code:`get_extent_bytes`, :code:`get_fixed_port_id`, etc. --- get information about a DSDL object. +* :code:`to_builtin`, :code:`update_from_builtin` --- convert a DSDL object to/from a Python dictionary. + This is useful for conversion between DSDL and JSON et al. +* :code:`get_attribute`, :code:`set_attribute` --- get/set object fields. + DSDL fields that are named like Python builtins or keywords are modified with a trailing underscore; + .e.g., ``if`` becomes ``if_``. + These helpers allow one to access fields by their DSDL name without having to worry about this. diff --git a/index.rst b/index.rst new file mode 100644 index 00000000..3ba2c46a --- /dev/null +++ b/index.rst @@ -0,0 +1,13 @@ + +.. toctree:: + :maxdepth: 2 + :hidden: + + docs/api/library + docs/languages + docs/templates + CLI (nnvg) + docs/dev + Appendix + +.. include:: README.rst diff --git a/requirements.txt b/requirements.txt index f5b0d184..3212a6ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # This file provided for readthedocs.io only. Use tox.ini for all dependencies. +. sphinx-argparse sphinxemoji diff --git a/setup.cfg b/setup.cfg index 0f8a0159..bd714ac0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,6 @@ classifiers = License :: OSI Approved :: MIT License Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 @@ -44,7 +43,7 @@ install_requires= zip_safe = False -python_requires = >=3.6 +python_requires = >=3.7 [options.entry_points] console_scripts = diff --git a/src/nunavut/_version.py b/src/nunavut/_version.py index 3110bc3f..514d2929 100644 --- a/src/nunavut/_version.py +++ b/src/nunavut/_version.py @@ -8,7 +8,7 @@ .. autodata:: __version__ """ -__version__ = "2.1.1" +__version__ = "2.2.0" __license__ = "MIT" __author__ = "OpenCyphal" __copyright__ = "Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. Copyright (c) 2023 OpenCyphal." diff --git a/src/nunavut/jinja/loaders.py b/src/nunavut/jinja/loaders.py index b9c1aa5b..a6736e0a 100644 --- a/src/nunavut/jinja/loaders.py +++ b/src/nunavut/jinja/loaders.py @@ -199,7 +199,7 @@ def type_to_template(self, value_type: typing.Type) -> typing.Optional[pathlib.P template_name = l.type_to_template(pydsdl.StructureType) assert template_name is not None - assert template_name.name == 'Any.j2' + assert template_name.name == 'StructureType.j2' """ template_path = None diff --git a/src/nunavut/lang/properties.yaml b/src/nunavut/lang/properties.yaml index fdbafe53..5ecec576 100644 --- a/src/nunavut/lang/properties.yaml +++ b/src/nunavut/lang/properties.yaml @@ -332,6 +332,8 @@ nunavut.lang.py: enable_stropping: true encoding_prefix: zX stropping_suffix: _ + limit_empty_lines: 1 + trim_trailing_whitespace: true nunavut.lang.js: extension: .js diff --git a/src/nunavut/lang/py/__init__.py b/src/nunavut/lang/py/__init__.py index 84b6f38d..5b7b9e69 100644 --- a/src/nunavut/lang/py/__init__.py +++ b/src/nunavut/lang/py/__init__.py @@ -7,10 +7,15 @@ Filters for generating python. All filters in this module will be available in the template's global namespace as ``py``. """ +from __future__ import annotations import builtins import functools import keyword -import typing +import base64 +import gzip +import pickle +import itertools +from typing import Any, Iterable import pydsdl @@ -31,7 +36,11 @@ class Language(BaseLanguage): Concrete, Python-specific :class:`nunavut.lang.Language` object. """ - PYTHON_RESERVED_IDENTIFIERS = sorted(list(map(str, list(keyword.kwlist) + dir(builtins)))) # type: typing.List[str] + PYTHON_RESERVED_IDENTIFIERS: list[str] = sorted(list(map(str, list(keyword.kwlist) + dir(builtins)))) + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._language_options["enable_serialization_asserts"] = True @functools.lru_cache(maxsize=None) def _get_token_encoder(self) -> TokenEncoder: @@ -40,11 +49,11 @@ def _get_token_encoder(self) -> TokenEncoder: """ return TokenEncoder(self, additional_reserved_identifiers=self.PYTHON_RESERVED_IDENTIFIERS) - def get_includes(self, dep_types: Dependencies) -> typing.List[str]: + def get_includes(self, dep_types: Dependencies) -> list[str]: # imports aren't includes return [] - def filter_id(self, instance: typing.Any, id_type: str = "any") -> str: + def filter_id(self, instance: Any, id_type: str = "any") -> str: raw_name = self.default_filter_id_for_target(instance) return self._get_token_encoder().strop(raw_name, id_type) @@ -108,7 +117,7 @@ def filter_to_template_unique_name(context: SupportsTemplateContext, base_token: @template_language_filter(__name__) -def filter_id(language: Language, instance: typing.Any, id_type: str = "any") -> str: +def filter_id(language: Language, instance: Any, id_type: str = "any") -> str: """ Filter that produces a valid Python identifier for a given object. The encoding may not be reversible. @@ -264,7 +273,7 @@ def filter_short_reference_name(language: Language, t: pydsdl.CompositeType) -> @template_language_list_filter(__name__) -def filter_imports(language: Language, t: pydsdl.CompositeType, sort: bool = True) -> typing.List[str]: +def filter_imports(language: Language, t: pydsdl.CompositeType, sort: bool = True) -> list[str]: """ Returns a list of all modules that must be imported to use a given type. @@ -302,7 +311,7 @@ def array_w_composite_type(data_type: pydsdl.Any) -> bool: @template_language_int_filter(__name__) -def filter_longest_id_length(language: Language, attributes: typing.List[pydsdl.Attribute]) -> int: +def filter_longest_id_length(language: Language, attributes: list[pydsdl.Attribute]) -> int: """ Return the length of the longest identifier in a list of :class:`pydsdl.Attribute` objects. @@ -332,3 +341,52 @@ def filter_longest_id_length(language: Language, attributes: typing.List[pydsdl. return max(map(len, map(functools.partial(filter_id, language), attributes))) else: return max(map(len, attributes)) + + +def filter_pickle(x: Any) -> str: + """ + Serializes the given object using pickle and then compresses it using gzip and then encodes it using base85. + """ + pck = base64.b85encode(gzip.compress(pickle.dumps(x, protocol=4))).decode().strip() # type: str + segment_gen = map("".join, itertools.zip_longest(*([iter(pck)] * 100), fillvalue="")) + return "\n".join(repr(x) for x in segment_gen) + + +def filter_numpy_scalar_type(t: pydsdl.Any) -> str: + """ + Returns the numpy scalar type that is the closest match to the given DSDL type. + """ + + def pick_width(w: int) -> int: + for o in [8, 16, 32, 64]: + if w <= o: + return o + raise ValueError(f"Invalid bit width: {w}") # pragma: no cover + + if isinstance(t, pydsdl.BooleanType): + return "_np_.bool_" + if isinstance(t, pydsdl.SignedIntegerType): + return f"_np_.int{pick_width(t.bit_length)}" + if isinstance(t, pydsdl.UnsignedIntegerType): + return f"_np_.uint{pick_width(t.bit_length)}" + if isinstance(t, pydsdl.FloatType): + return f"_np_.float{pick_width(t.bit_length)}" + assert not isinstance(t, pydsdl.PrimitiveType), "Forgot to handle some primitive types" + return "_np_.object_" + + +def filter_newest_minor_version_aliases(tys: Iterable[pydsdl.CompositeType]) -> list[tuple[str, pydsdl.CompositeType]]: + """ + Implementation of https://github.com/OpenCyphal/nunavut/issues/193 + """ + tys = list(tys) + return [ + ( + f"{name}_{major}", + max( + (t for t in tys if t.short_name == name and t.version.major == major), + key=lambda x: int(x.version.minor), + ), + ) + for name, major in sorted({(x.short_name, x.version.major) for x in tys}) + ] diff --git a/src/nunavut/lang/py/support/__init__.py b/src/nunavut/lang/py/support/__init__.py index 8ff6165b..b0078edd 100644 --- a/src/nunavut/lang/py/support/__init__.py +++ b/src/nunavut/lang/py/support/__init__.py @@ -1,19 +1,30 @@ # -# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# Copyright (C) 2018-2020 OpenCyphal Development Team +# Copyright (C) 2023 OpenCyphal Development Team # This software is distributed under the terms of the MIT License. # """ -Empty python package to ensure the support generator doesn't explode. +Contains supporting Python modules to distribute with generated types. +The contained support modules are not part of Nunavut, and one should not attempt to import them, +as they may depend on modules that are not available in the local environment. """ import pathlib import typing - -from nunavut._utilities import ResourceType, empty_list_support_files +from nunavut._utilities import ( + ResourceType, + empty_list_support_files, + iter_package_resources, +) __version__ = "1.0.0" -"""Version of the py support headers.""" +"""Version of the Python support module.""" -def list_support_files(resource_type: ResourceType = ResourceType.ANY) -> typing.Generator[pathlib.Path, None, None]: - return empty_list_support_files() +def list_support_files( + resource_type: ResourceType = ResourceType.ANY, +) -> typing.Generator[pathlib.Path, None, None]: + """ + Get a list of Python support modules embedded in this package. + """ + if resource_type not in (ResourceType.ANY, ResourceType.SERIALIZATION_SUPPORT): + return empty_list_support_files() + return iter_package_resources(__name__, ".j2") diff --git a/src/nunavut/lang/py/support/nunavut_support.j2 b/src/nunavut/lang/py/support/nunavut_support.j2 new file mode 100644 index 00000000..1fce3406 --- /dev/null +++ b/src/nunavut/lang/py/support/nunavut_support.j2 @@ -0,0 +1,1814 @@ +# Copyright (C) 2019 OpenCyphal Development Team +# This software is distributed under the terms of the MIT License. +# This code has been migrated from PyCyphal: https://github.com/OpenCyphal/pycyphal +# Author: Pavel Kirienko , Kalyan Sriram + +""" +This module contains various definitions and logic that the code emitted by Nunavut relies on. +It shall be distributed along with the generated code and be importable by it. +No API stability guarantees are made for this module except for those entities that are listed in __all__. +The generated code currently requires the following dependencies to be available at runtime: + +- NumPy +- PyDSDL + +This module also contains built-in unit test functions (named "test_*") and doctests. +To execute the tests, simply invoke PyTest on this file as follows:: + + pytest --doctest-modules nunavut_support.py +""" + +from __future__ import annotations +import abc +import sys +from typing import TypeVar, Type, Sequence, cast, Any, Iterable +import importlib +import struct +import string +import base64 +import logging + +# Dependencies not from the standard library: +import numpy # This is not a dependency of Nunavut but is used in the generated code. +from numpy.typing import NDArray +import pydsdl + +if sys.byteorder != "little": # pragma: no cover + raise RuntimeError( + "BIG-ENDIAN PLATFORMS ARE NOT YET SUPPORTED. " + "The current serialization code assumes that the native byte order is little-endian. Since Cyphal uses " + "little-endian byte order in its serialized data representations, this assumption allows us to bypass data " + "transformation in many cases, resulting in zero-cost serialization and deserialization. " + "Big-endian platforms are unable to take advantage of that, requiring byte swapping for multi-byte entities; " + "fortunately, nowadays such platforms are uncommon. If you need to use this code on a big-endian platform, " + "please implement the missing code and submit a pull request upstream, then remove this exception." + ) + +API_VERSION = {{ nunavut.support.version }} +""" +This is the version of the API that the generated code expects to be available at runtime. +""" + +__all__ = [ + "serialize", + "deserialize", + "get_model", + "get_class", + "get_extent_bytes", + "get_fixed_port_id", + "get_attribute", + "set_attribute", + "is_serializable", + "is_message_type", + "is_service_type", + "to_builtin", + "update_from_builtin", +] +""" +No API stability guarantees are made for this module except for those entities that are listed in __all__. +""" + +Byte = numpy.uint8 +""" +We must use uint8 instead of ubyte because uint8 is platform-invariant whereas (u)byte is platform-dependent. +""" + +StdPrimitive = TypeVar( + "StdPrimitive", + numpy.float64, + numpy.float32, + numpy.float16, + numpy.uint8, + numpy.uint16, + numpy.uint32, + numpy.uint64, + numpy.int8, + numpy.int16, + numpy.int32, + numpy.int64, +) + +T = TypeVar("T") + +_logger = logging.getLogger(__name__) + +# ================================================== SERIALIZER ================================================== + + +class Serializer(abc.ABC): + """ + All methods operating on scalars implicitly truncate the value if it exceeds the range, + excepting signed integers, for which overflow handling is not implemented (DSDL does not permit truncation + of signed integers anyway so it doesn't matter). Saturation must be implemented externally. + Methods that expect an unsigned integer will raise ValueError if the supplied integer is negative. + """ + + _EXTRA_BUFFER_CAPACITY_BYTES = 1 + """ + We extend the requested buffer size by one because some of the non-byte-aligned write operations + require us to temporarily use one extra byte after the current byte. + """ + + def __init__(self, buffer: NDArray[Byte]): + """ + Do not call this directly. Use :meth:`new` to instantiate. + """ + self._buf = buffer + self._bit_offset = 0 + + @staticmethod + def new(buffer_size_in_bytes: int) -> Serializer: + buffer_size_in_bytes = int(buffer_size_in_bytes) + Serializer._EXTRA_BUFFER_CAPACITY_BYTES + buf: NDArray[Byte] = numpy.zeros(buffer_size_in_bytes, dtype=Byte) + return _PlatformSpecificSerializer(buf) + + @property + def current_bit_length(self) -> int: + return self._bit_offset + + @property + def buffer(self) -> NDArray[Byte]: + """Returns a properly sized read-only slice of the destination buffer zero-bit-padded to byte.""" + out: NDArray[Byte] = self._buf[: (self._bit_offset + 7) // 8] + out.flags.writeable = False + # Here we used to check if out.base is self._buf to make sure we're not creating a copy because that might + # be costly. We no longer do that because it doesn't work with forked serializers: forks don't own their + # buffers so this check would be failing; also, with MyPy v1.19 this expression used to segfault the + # interpreter. Very dangerous. + return out + + def skip_bits(self, bit_length: int) -> None: + """This is used for padding bits and for skipping fragments written by forked serializers.""" + self._bit_offset += bit_length + + def pad_to_alignment(self, bit_length: int) -> None: + while self._bit_offset % bit_length != 0: + self.add_unaligned_bit(False) + + def fork_bytes(self, forked_buffer_size_in_bytes: int) -> Serializer: + """ + Creates another serializer that uses the same underlying serialization destination buffer + but offset by :prop:`current_bit_length`. This is intended for delimited serialization. + The algorithm is simple: + + - Fork the main serializer (M) at the point where the delimited nested instance needs to be serialized. + - Having obtained the forked serializer (F), skip the size of the delimited header and serialize the object. + - Take the offset of F (in bytes) sans the size of the delimiter header and serialize the value using M. + - Skip M by the above number of bytes to avoid overwriting the fragment written by F. + - Discard F. The job is done. + + This may be unnecessary if the nested object is of a fixed size. In this case, since its length is known, + the delimiter header can be serialized as a constant, and then the nested object can be serialized trivially + as if it was sealed. + + This method raises a :class:`ValueError` if the forked instance is not byte-aligned or if the requested buffer + size is too large. + """ + if self._bit_offset % 8 != 0: + raise ValueError("Cannot fork unaligned serializer") + forked_buffer = self._buf[self._bit_offset // 8 :] + forked_buffer_size_in_bytes += Serializer._EXTRA_BUFFER_CAPACITY_BYTES + if len(forked_buffer) < forked_buffer_size_in_bytes: + raise ValueError( + f"The required forked buffer size of {forked_buffer_size_in_bytes} bytes is less " + f"than the available remaining buffer space of {len(forked_buffer)} bytes" + ) + forked_buffer = forked_buffer[:forked_buffer_size_in_bytes] + assert len(forked_buffer) == forked_buffer_size_in_bytes + return _PlatformSpecificSerializer(forked_buffer) + + # + # Fast methods optimized for aligned primitive fields. + # The most specialized methods must be used whenever possible for best performance. + # + @abc.abstractmethod + def add_aligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + """ + Accepts an array of ``(u?int|float)(8|16|32|64)`` and encodes it into the destination. + On little-endian platforms this may be implemented virtually through ``memcpy()``. + The current bit offset must be byte-aligned. + """ + raise NotImplementedError + + def add_aligned_array_of_bits(self, x: NDArray[numpy.bool_]) -> None: + """ + Accepts an array of bools and encodes it into the destination using fast native serialization routine + implemented in numpy. The current bit offset must be byte-aligned. + """ + assert self._bit_offset % 8 == 0 + packed = numpy.packbits(x, bitorder="little") + assert len(packed) * 8 >= len(x) + self._buf[self._byte_offset : self._byte_offset + len(packed)] = packed + self._bit_offset += len(x) + + def add_aligned_bytes(self, x: NDArray[Byte]) -> None: + """Simply adds a sequence of bytes; the current bit offset must be byte-aligned.""" + assert self._bit_offset % 8 == 0 + self._buf[self._byte_offset : self._byte_offset + len(x)] = x + self._bit_offset += len(x) * 8 + + def add_aligned_u8(self, x: int) -> None: + assert self._bit_offset % 8 == 0 + self._ensure_not_negative(x) + self._buf[self._byte_offset] = x + self._bit_offset += 8 + + def add_aligned_u16(self, x: int) -> None: + self._ensure_not_negative(x) + self.add_aligned_u8(x & 0xFF) + self.add_aligned_u8((x >> 8) & 0xFF) + + def add_aligned_u32(self, x: int) -> None: + self.add_aligned_u16(x) + self.add_aligned_u16(x >> 16) + + def add_aligned_u64(self, x: int) -> None: + self.add_aligned_u32(x) + self.add_aligned_u32(x >> 32) + + def add_aligned_i8(self, x: int) -> None: + self.add_aligned_u8((256 + x) if x < 0 else x) + + def add_aligned_i16(self, x: int) -> None: + self.add_aligned_u16((65536 + x) if x < 0 else x) + + def add_aligned_i32(self, x: int) -> None: + self.add_aligned_u32((2**32 + x) if x < 0 else x) + + def add_aligned_i64(self, x: int) -> None: + self.add_aligned_u64((2**64 + x) if x < 0 else x) + + def add_aligned_f16(self, x: float) -> None: + self.add_aligned_bytes(self._float_to_bytes("e", x)) + + def add_aligned_f32(self, x: float) -> None: + self.add_aligned_bytes(self._float_to_bytes("f", x)) + + def add_aligned_f64(self, x: float) -> None: + self.add_aligned_bytes(self._float_to_bytes("d", x)) + + # + # Less specialized methods: assuming that the value is aligned at the beginning, but its bit length + # is non-standard and may not be an integer multiple of eight. + # These must not be used if there is a suitable more specialized version defined above. + # + def add_aligned_unsigned(self, value: int, bit_length: int) -> None: + assert self._bit_offset % 8 == 0 + self._ensure_not_negative(value) + bs = self._unsigned_to_bytes(value, bit_length) + self._buf[self._byte_offset : self._byte_offset + len(bs)] = bs + self._bit_offset += bit_length + + def add_aligned_signed(self, value: int, bit_length: int) -> None: + assert bit_length >= 2 + self.add_aligned_unsigned((2**bit_length + value) if value < 0 else value, bit_length) + + # + # Least specialized methods: no assumptions about alignment are made. + # These are the slowest and may be used only if none of the above (specialized) methods are suitable. + # + @abc.abstractmethod + def add_unaligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + """See the aligned counterpart.""" + raise NotImplementedError + + def add_unaligned_array_of_bits(self, x: NDArray[numpy.bool_]) -> None: + packed = numpy.packbits(x, bitorder="little") + backtrack = len(packed) * 8 - len(x) + assert backtrack >= 0 + self.add_unaligned_bytes(packed) + self._bit_offset -= backtrack + + def add_unaligned_bytes(self, value: NDArray[Byte]) -> None: + # This is a faster variant of Ben Dyer's unaligned bit copy algorithm: + # https://github.com/OpenCyphal/libuavcan/blob/fd8ba19bc9c09c05a/libuavcan/src/marshal/uc_bit_array_copy.cpp#L12 + # It is faster because here we are aware that the source is always aligned, which we take advantage of. + left = self._bit_offset % 8 + right = 8 - left + for b in value: + self._buf[self._byte_offset] |= (b << left) & 0xFF + self._bit_offset += 8 + self._buf[self._byte_offset] = b >> right + + def add_unaligned_unsigned(self, value: int, bit_length: int) -> None: + self._ensure_not_negative(value) + bs = self._unsigned_to_bytes(value, bit_length) + backtrack = len(bs) * 8 - bit_length + assert backtrack >= 0 + self.add_unaligned_bytes(bs) + self._bit_offset -= backtrack + + def add_unaligned_signed(self, value: int, bit_length: int) -> None: + assert bit_length >= 2 + self.add_unaligned_unsigned((2**bit_length + value) if value < 0 else value, bit_length) + + def add_unaligned_f16(self, x: float) -> None: + self.add_unaligned_bytes(self._float_to_bytes("e", x)) + + def add_unaligned_f32(self, x: float) -> None: + self.add_unaligned_bytes(self._float_to_bytes("f", x)) + + def add_unaligned_f64(self, x: float) -> None: + self.add_unaligned_bytes(self._float_to_bytes("d", x)) + + def add_unaligned_bit(self, x: bool) -> None: + self._buf[self._byte_offset] |= bool(x) << (self._bit_offset % 8) + self._bit_offset += 1 + + # + # Private methods. + # + @staticmethod + def _unsigned_to_bytes(value: int, bit_length: int) -> NDArray[Byte]: + assert bit_length >= 1 + assert value >= 0, "This operation is undefined for negative integers" + value &= 2**bit_length - 1 + num_bytes = (bit_length + 7) // 8 + out: NDArray[Byte] = numpy.zeros(num_bytes, dtype=Byte) + for i in range(num_bytes): # Oh, why is my life like this? + out[i] = value & 0xFF + value >>= 8 + return out + + @staticmethod + def _float_to_bytes(format_char: str, x: float) -> NDArray[Byte]: + f = "<" + format_char + try: + out = struct.pack(f, x) + except OverflowError: # Oops, let's truncate (saturation must be implemented by the caller if needed) + out = struct.pack(f, numpy.inf if x > 0 else -numpy.inf) + # Note: this operation does not copy the underlying bytes + return numpy.frombuffer(out, dtype=Byte) + + @staticmethod + def _ensure_not_negative(x: int) -> None: + if x < 0: + raise ValueError(f"The requested serialization method is not defined on negative integers ({x})") + + @property + def _byte_offset(self) -> int: + return self._bit_offset // 8 + + def __str__(self) -> str: + s = " ".join(map(_byte_as_bit_string, self.buffer)) + if self._bit_offset % 8 != 0: + s, tail = s.rsplit(maxsplit=1) + bits_to_cut_off = 8 - self._bit_offset % 8 + tail = ("x" * bits_to_cut_off) + tail[bits_to_cut_off:] + return s + " " + tail + return s + + def __repr__(self) -> str: + return f"{type(self).__name__}({self})" + + +class _LittleEndianSerializer(Serializer): + # noinspection PyUnresolvedReferences + def add_aligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + # This is close to direct memcpy() from the source memory into the destination memory, which is very fast. + # We assume that the local platform uses IEEE 754-compliant floating point representation; otherwise, + # the generated serialized representation may be incorrect. NumPy seems to only support IEEE-754 compliant + # platforms though so I don't expect any compatibility issues. + self.add_aligned_bytes(x.view(Byte)) + + def add_unaligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + # This is much slower than the aligned version because we have to manually copy and shift each byte, + # but still better than manual elementwise serialization. + self.add_unaligned_bytes(x.view(Byte)) + + +class _BigEndianSerializer(Serializer): + def add_aligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + raise NotImplementedError("Pull requests are welcome") # pragma: no cover + + def add_unaligned_array_of_standard_bit_length_primitives(self, x: NDArray[StdPrimitive]) -> None: + raise NotImplementedError("Pull requests are welcome") # pragma: no cover + + +_PlatformSpecificSerializer = { + "little": _LittleEndianSerializer, + "big": _BigEndianSerializer, +}[sys.byteorder] + + +def _byte_as_bit_string(x: int) -> str: + return bin(x)[2:].zfill(8) + + +def test_serializer_to_str() -> None: + """This is a unit test, not for production use.""" + ser = Serializer.new(50) + assert str(ser) == "" + ser.add_aligned_u8(0b11001110) + assert str(ser) == "11001110" + ser.add_aligned_i16(-1) + assert str(ser) == "11001110 11111111 11111111" + ser.add_aligned_unsigned(0, 1) + assert str(ser) == "11001110 11111111 11111111 xxxxxxx0" + ser.add_unaligned_signed(-1, 3) + assert str(ser) == "11001110 11111111 11111111 xxxx1110" + + +def test_serializer_aligned() -> None: + """This is a unit test, not for production use.""" + from pytest import raises + + def unseparate(s: Any) -> str: + return str(s).replace(" ", "") + + bs = _byte_as_bit_string + ser = Serializer.new(50) + expected = "" + assert str(ser) == "" + + with raises(ValueError): + ser.add_aligned_u8(-42) + + ser.add_aligned_u8(0b1010_0111) + expected += "1010 0111" + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_i64(0x1234_5678_90AB_CDEF) + expected += bs(0xEF) + bs(0xCD) + bs(0xAB) + bs(0x90) + expected += bs(0x78) + bs(0x56) + bs(0x34) + bs(0x12) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_i32(-0x1234_5678) # Two's complement: 0xEDCB_A988 + expected += bs(0x88) + bs(0xA9) + bs(0xCB) + bs(0xED) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_i16(-2) # Two's complement: 0xfffe + ser.skip_bits(8) + ser.add_aligned_i8(127) + expected += bs(0xFE) + bs(0xFF) + bs(0x00) + bs(0x7F) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_f64(1) # IEEE 754: 0x3ff0_0000_0000_0000 + expected += bs(0x00) * 6 + bs(0xF0) + bs(0x3F) + ser.add_aligned_f32(1) # IEEE 754: 0x3f80_0000 + expected += bs(0x00) * 2 + bs(0x80) + bs(0x3F) + ser.add_aligned_f16(99999.9) # IEEE 754: overflow, degenerates to +inf: 0x7c00 + expected += bs(0x00) * 1 + bs(0x7C) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_unsigned(0xBEDA, 12) # 0xBxxx will be truncated away + expected += "1101 1010 xxxx1110" + assert unseparate(ser) == unseparate(expected) + + ser.skip_bits(4) # Bring back into alignment + expected = expected[:-8] + "00001110" + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_unsigned(0xBEDA, 16) # Making sure byte-size-aligned are handled well, too + expected += bs(0xDA) + bs(0xBE) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_signed(-2, 9) # Two's complement: 510 = 0b1_1111_1110 + expected += "11111110 xxxxxxx1" # MSB is at the end + assert unseparate(ser) == unseparate(expected) + + ser.skip_bits(7) # Bring back into alignment + expected = expected[:-8] + "00000001" + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_array_of_standard_bit_length_primitives(numpy.array([0xDEAD, 0xBEEF], numpy.uint16)) + expected += bs(0xAD) + bs(0xDE) + bs(0xEF) + bs(0xBE) + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_array_of_bits( + numpy.array( + [ + True, + False, + True, + False, + False, + False, + True, + True, # 10100011 + True, + True, + True, + False, + False, + True, + True, + False, # 11100110 + ], + bool, + ) + ) + expected += "11000101 01100111" + assert unseparate(ser) == unseparate(expected) + + ser.add_aligned_array_of_bits( + numpy.array( + [ + True, + False, + True, + False, + False, + False, + True, + True, # 10100011 + True, + True, + False, + True, + False, # 11010 + ], + bool, + ) + ) + expected += "11000101 xxx01011" + assert unseparate(ser) == unseparate(expected) + + print("repr(serializer):", repr(ser)) + + with raises(ValueError, match=".*read-only.*"): + ser.buffer[0] = 123 # The buffer is read-only for safety reasons + + +def test_serializer_unaligned() -> None: # Tricky cases with unaligned fields (very tricky) + """This is a unit test, not for production use.""" + ser = Serializer.new(40) + + ser.add_unaligned_array_of_bits( + numpy.array( + [ + True, + False, + True, + False, + False, + False, + True, + True, # 10100011 + True, + True, + True, # 111 + ], + bool, + ) + ) + assert str(ser) == "11000101 xxxxx111" + + ser.add_unaligned_array_of_bits( + numpy.array( + [ + True, + False, + True, + False, + False, # ???10100 (byte alignment restored here) + True, + True, + True, + False, + True, # 11101 (byte alignment lost, three bits short) + ], + bool, + ) + ) + assert str(ser) == "11000101 00101111 xxx10111" + + # Adding '00010010 00110100 01010110' + ser.add_unaligned_bytes(numpy.array([0x12, 0x34, 0x56], dtype=Byte)) + assert str(ser) == "11000101 00101111 01010111 10000010 11000110 xxx01010" + + ser.add_unaligned_array_of_bits(numpy.array([False, True, True], bool)) + assert ser._bit_offset % 8 == 0, "Byte alignment is not restored" # pylint: disable=protected-access + assert str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010" + + ser.add_unaligned_bytes(numpy.array([0x12, 0x34, 0x56], dtype=Byte)) # We're actually aligned here + assert str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110" + + ser.add_unaligned_bit(True) + ser.add_unaligned_bit(False) + ser.add_unaligned_bit(False) + ser.add_unaligned_bit(True) + ser.add_unaligned_bit(True) # Three bits short until alignment + assert str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 xxx11001" + + ser.add_unaligned_signed(-2, 8) # Two's complement: 254 = 1111 1110 + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "xxx11111" + ) + + ser.add_unaligned_unsigned(0b11101100101, 11) # Tricky, eh? Eleven bits, unaligned write + assert ser._bit_offset % 8 == 0, "Byte alignment is not restored" # pylint: disable=protected-access + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100" + ) + + ser.add_unaligned_unsigned(0b1110, 3) # MSB truncated away + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 xxxxx110" + ) + + # Adding '00000000 00000000 00000000 00000000 00000000 00000000 11110000 00111111' + ser.add_unaligned_f64(1) + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 " + "xxxxx001" + ) + + # Adding '00000000 00000000 10000000 00111111' + ser.add_unaligned_f32(1) + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 " + "00000001 00000000 00000000 11111100 xxxxx001" + ) + + # Adding '00000000 11111100' + ser.add_unaligned_f16(-99999.9) + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 " + "00000001 00000000 00000000 11111100 00000001 11100000 xxxxx111" + ) + + # Adding '10101101 11011110 11101111 10111110' + ser.add_unaligned_array_of_standard_bit_length_primitives(numpy.array([0xDEAD, 0xBEEF], numpy.uint16)) + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 " + "00000001 00000000 00000000 11111100 00000001 11100000 01101111 11110101 01111110 11110111 " + "xxxxx101" + ) + + ser.skip_bits(5) + assert ser._bit_offset % 8 == 0, "Byte alignment is not restored" # pylint: disable=protected-access + assert ( + str(ser) == "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 " + "10111111 11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 " + "00000001 00000000 00000000 11111100 00000001 11100000 01101111 11110101 01111110 11110111 " + "00000101" + ) + + print("repr(serializer):", repr(ser)) + + +def test_serializer_fork_bytes() -> None: + """This is a unit test, not for production use.""" + import pytest + + r = Serializer.new(16) + m = Serializer.new(16) + assert str(r) == str(m) + + r.add_aligned_u8(123) + m.add_aligned_u8(123) + assert str(r) == str(m) + + with pytest.raises(ValueError): + m.fork_bytes(16) # Out of range + + f = m.fork_bytes(15) + assert str(f) == "" + r.add_aligned_u8(42) + f.add_aligned_u8(42) + assert str(r) != str(m) + m.skip_bits(8) + assert str(r) == str(m) # M updated even though we didn't write in it! + + r.add_aligned_u8(11) + m.add_aligned_u8(11) + assert str(r) == str(m) + + f.skip_bits(8) + ff = f.fork_bytes(1) + r.add_aligned_u8(22) + ff.add_aligned_u8(22) + assert str(r) != str(m) + m.skip_bits(8) + assert str(r) == str(m) # M updated even though we didn't write in it! Double indirection. + + ff.add_unaligned_bit(True) # Break alignment + with pytest.raises(ValueError): + ff.fork_bytes(1) # Bad alignment + + +# ================================================== DESERIALIZER ================================================== + + +class Deserializer(abc.ABC): + """ + The deserializer class is used for deconstruction of serialized representations of DSDL objects into Python objects. + It implements the implicit zero extension rule as described in the Specification. + """ + + class FormatError(ValueError): + """ + This exception class is used when an auto-generated deserialization routine is supplied with invalid input data; + in other words, input that is not a valid serialized representation of its data type. + + Deserialization logic (auto-generated or manually written) may use this exception type. + When thrown from a deserialization method, it may be intercepted by its caller, + which then returns None instead of a valid deserialized instance, + indicating that the serialized representation is invalid. + """ + + def __init__(self, fragmented_buffer: Sequence[memoryview]): + """ + Do not call this directly. Use :meth:`new` to instantiate. + """ + self._buf = ZeroExtendingBuffer(fragmented_buffer) + self._bit_offset = 0 + assert self.consumed_bit_length + self.remaining_bit_length == self._buf.bit_length + + @staticmethod + def new(fragmented_buffer: Sequence[memoryview]) -> Deserializer: + """ + :param fragmented_buffer: The source serialized representation. The deserializer will attempt to avoid copying + any data from the serialized representation, establishing direct references to its memory instead. + If any of the source buffer fragments are read-only, some of the deserialized array-typed values + may end up being read-only as well. If that is undesirable, use writeable buffer. + + :return: A new instance of Deserializer, either little-endian or big-endian, depending on the platform. + """ + return _PlatformSpecificDeserializer(fragmented_buffer) + + @property + def consumed_bit_length(self) -> int: + return self._bit_offset + + @property + def remaining_bit_length(self) -> int: + """Returns negative if out of bounds (zero extension rule in effect).""" + return self._buf.bit_length - self._bit_offset + + def skip_bits(self, bit_length: int) -> None: + """This is used for padding bits.""" + _ensure_cardinal(bit_length) + self._bit_offset += bit_length + + def pad_to_alignment(self, bit_length: int) -> None: + while self._bit_offset % bit_length != 0: + self._bit_offset += 1 + + def fork_bytes(self, forked_buffer_size_in_bytes: int) -> Deserializer: + """ + This is the counterpart of fork_bytes() defined in the serializer intended for deserializing delimited types. + Forking is necessary to support implicit truncation and implicit zero extension of nested objects. + The algorithm is as follows: + + - Before forking, using the main deserializer (M), read the delimiter header. + - If the value of the delimiter header exceeds the number of bytes remaining in the deserialization buffer, + raise :class:`FormatError`, thereby declaring the serialized representation invalid, as prescribed by the + Specification. + - Fork M. + - Skip M by the size reported by the delimiter header. + - Using the forked deserializer (F), deserialize the nested object. F will apply implicit truncation + and the implicit zero extension rules as necessary regardless of the amount of data remaining in M. + - Discard F. + + This method raises a :class:`ValueError` if the forked instance is not byte-aligned or if the requested buffer + size is too large. The latter is because it is a class usage error, not a deserialization error. + """ + if self._bit_offset % 8 != 0: + raise ValueError("Cannot fork unaligned deserializer") + remaining_bit_length = self.remaining_bit_length + assert remaining_bit_length % 8 == 0 + remaining_byte_length = remaining_bit_length // 8 + if remaining_byte_length < forked_buffer_size_in_bytes: + raise ValueError( + f"Invalid usage: the required forked buffer size of {forked_buffer_size_in_bytes} bytes " + f"is less than the available remaining buffer space of {remaining_byte_length} bytes" + ) + out = _PlatformSpecificDeserializer(self._buf.fork_bytes(self._byte_offset, forked_buffer_size_in_bytes)) + assert out.remaining_bit_length == forked_buffer_size_in_bytes * 8 + return out + + # + # Fast methods optimized for aligned primitive fields. + # The most specialized methods must be used whenever possible for best performance. + # + @abc.abstractmethod + def fetch_aligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + """ + Returns a new array which may directly refer to the underlying memory. + The returned array may be read-only if the source buffer is read-only. + """ + raise NotImplementedError + + def fetch_aligned_array_of_bits(self, count: int) -> NDArray[numpy.bool_]: + """ + Quickly decodes an aligned array of bits using the numpy's fast bit unpacking routine. + A new array is always created (the memory cannot be shared with the buffer due to the layout transformation). + The returned array is of dtype :class:`bool`. + """ + _ensure_cardinal(count) + assert self._bit_offset % 8 == 0 + bs = self._buf.get_unsigned_slice(self._byte_offset, self._byte_offset + (count + 7) // 8) + out = numpy.unpackbits(bs, bitorder="little")[:count] + self._bit_offset += count + assert len(out) == count + return cast(NDArray[numpy.bool_], out.astype(dtype=bool)) + + def fetch_aligned_bytes(self, count: int) -> NDArray[Byte]: + _ensure_cardinal(count) + assert self._bit_offset % 8 == 0 + out = self._buf.get_unsigned_slice(self._byte_offset, self._byte_offset + count) + self._bit_offset += count * 8 + assert len(out) == count + return out + + def fetch_aligned_u8(self) -> int: + assert self._bit_offset % 8 == 0 + out = self._buf.get_byte(self._byte_offset) + assert isinstance(out, int) # Make sure it's not a NumPy's integer type like numpy.uint8. We need native int. + self._bit_offset += 8 + return out + + def fetch_aligned_u16( + self, + ) -> int: # TODO: here and below, consider using int.from_bytes()? + out = self.fetch_aligned_u8() + out |= self.fetch_aligned_u8() << 8 + return out + + def fetch_aligned_u32(self) -> int: + out = self.fetch_aligned_u16() + out |= self.fetch_aligned_u16() << 16 + return out + + def fetch_aligned_u64(self) -> int: + out = self.fetch_aligned_u32() + out |= self.fetch_aligned_u32() << 32 + return out + + def fetch_aligned_i8(self) -> int: + x = self.fetch_aligned_u8() + return (x - 256) if x >= 128 else x + + def fetch_aligned_i16(self) -> int: + x = self.fetch_aligned_u16() + return (x - 65536) if x >= 32768 else x + + def fetch_aligned_i32(self) -> int: + x = self.fetch_aligned_u32() + return int(x - 2**32) if x >= 2**31 else x # wrapped in int() to appease MyPy + + def fetch_aligned_i64(self) -> int: + x = self.fetch_aligned_u64() + return int(x - 2**64) if x >= 2**63 else x # wrapped in int() to appease MyPy + + def fetch_aligned_f16(self) -> float: # noinspection PyTypeChecker + (out,) = struct.unpack(" float: # noinspection PyTypeChecker + (out,) = struct.unpack(" float: # noinspection PyTypeChecker + (out,) = struct.unpack(" int: + _ensure_cardinal(bit_length) + assert self._bit_offset % 8 == 0 + bs = self._buf.get_unsigned_slice(self._byte_offset, self._byte_offset + (bit_length + 7) // 8) + self._bit_offset += bit_length + return self._unsigned_from_bytes(bs, bit_length) + + def fetch_aligned_signed(self, bit_length: int) -> int: + assert bit_length >= 2 + u = self.fetch_aligned_unsigned(bit_length) + out = (u - 2**bit_length) if u >= 2 ** (bit_length - 1) else u + assert isinstance(out, int) # MyPy pls + return out + + # + # Least specialized methods: no assumptions about alignment are made. + # These are the slowest and may be used only if none of the above (specialized) methods are suitable. + # + @abc.abstractmethod + def fetch_unaligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + """See the aligned counterpart.""" + raise NotImplementedError + + def fetch_unaligned_array_of_bits(self, count: int) -> NDArray[numpy.bool_]: + _ensure_cardinal(count) + byte_count = (count + 7) // 8 + bs = self.fetch_unaligned_bytes(byte_count) + assert len(bs) == byte_count + backtrack = byte_count * 8 - count + assert 0 <= backtrack < 8 + self._bit_offset -= backtrack + out: NDArray[numpy.bool_] = numpy.unpackbits(bs, bitorder="little")[:count].astype(dtype=bool) + assert len(out) == count + return out + + def fetch_unaligned_bytes(self, count: int) -> NDArray[Byte]: + if count > 0: + if self._bit_offset % 8 != 0: + # This is a faster variant of Ben Dyer's unaligned bit copy algorithm: + # https://github.com/OpenCyphal/libuavcan/blob/fd8ba19bc9c09/libuavcan/src/marshal/uc_bit_array_copy.cpp#L12 + # It is faster because here we are aware that the destination is always aligned, which we take + # advantage of. This algorithm breaks for byte-aligned offset, so we have to delegate the aligned + # case to the aligned copy method (which is also much faster). + out: NDArray[Byte] = numpy.empty(count, dtype=Byte) + right = self._bit_offset % 8 + left = 8 - right + assert (1 <= right <= 7) and (1 <= left <= 7) + # The last byte is a special case because if we're reading the last few unaligned bits, the very last + # byte access will be always out of range. We don't care because of the implicit zero extension rule. + for i in range(count): + byte_offset = self._byte_offset + out[i] = (self._buf.get_byte(byte_offset) >> right) | ( + (self._buf.get_byte(byte_offset + 1) << left) & 0xFF + ) + self._bit_offset += 8 + assert len(out) == count + return out + return self.fetch_aligned_bytes(count) + return numpy.zeros(0, dtype=Byte) + + def fetch_unaligned_unsigned(self, bit_length: int) -> int: + _ensure_cardinal(bit_length) + byte_length = (bit_length + 7) // 8 + bs = self.fetch_unaligned_bytes(byte_length) + assert len(bs) == byte_length + backtrack = byte_length * 8 - bit_length + assert 0 <= backtrack < 8 + self._bit_offset -= backtrack + return self._unsigned_from_bytes(bs, bit_length) + + def fetch_unaligned_signed(self, bit_length: int) -> int: + assert bit_length >= 2 + u = self.fetch_unaligned_unsigned(bit_length) + out = (u - 2**bit_length) if u >= 2 ** (bit_length - 1) else u + assert isinstance(out, int) # MyPy pls + return out + + def fetch_unaligned_f16(self) -> float: # noinspection PyTypeChecker + (out,) = struct.unpack(" float: # noinspection PyTypeChecker + (out,) = struct.unpack(" float: # noinspection PyTypeChecker + (out,) = struct.unpack(" bool: + mask = 1 << (self._bit_offset % 8) + assert 1 <= mask <= 128 + out = self._buf.get_byte(self._byte_offset) & mask == mask + self._bit_offset += 1 + return bool(out) + + # + # Private methods. + # + @staticmethod + def _unsigned_from_bytes(x: NDArray[Byte], bit_length: int) -> int: + assert bit_length >= 1 + num_bytes = (bit_length + 7) // 8 + assert num_bytes > 0 + last_byte_index = num_bytes - 1 + assert len(x) >= num_bytes + out = 0 + for i in range(last_byte_index): + out |= int(x[i]) << (i * 8) + msb_mask = (2 ** (bit_length % 8) - 1) if bit_length % 8 != 0 else 0xFF + assert msb_mask in (1, 3, 7, 15, 31, 63, 127, 255) + out |= (int(x[last_byte_index]) & msb_mask) << (last_byte_index * 8) + assert 0 <= out < (2**bit_length) + return out + + @property + def _byte_offset(self) -> int: + return self._bit_offset // 8 + + def __repr__(self) -> str: + return ( + f"{type(self).__name__}(" + f"consumed_bit_length={self.consumed_bit_length}, " + f"remaining_bit_length={self.remaining_bit_length}, " + f"serialized_representation_base64={self._buf.to_base64()!r})" + ) + + +class _LittleEndianDeserializer(Deserializer): + def fetch_aligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + assert dtype not in (bool, numpy.bool_, object), "Invalid usage" + assert self._bit_offset % 8 == 0 + bo = self._byte_offset + # Interestingly, numpy doesn't care about alignment. If the source buffer is not properly aligned, it will + # work anyway but slower. + out: NDArray[StdPrimitive] = numpy.frombuffer( + self._buf.get_unsigned_slice(bo, bo + count * numpy.dtype(dtype).itemsize), + dtype=dtype, + ) + assert len(out) == count + self._bit_offset += out.nbytes * 8 + return out + + def fetch_unaligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + assert dtype not in (bool, numpy.bool_, object), "Invalid usage" + bs = self.fetch_unaligned_bytes(numpy.dtype(dtype).itemsize * count) + assert len(bs) >= count + return numpy.frombuffer(bs, dtype=dtype, count=count) + + +class _BigEndianDeserializer(Deserializer): + def fetch_aligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + raise NotImplementedError("Pull requests are welcome") + + def fetch_unaligned_array_of_standard_bit_length_primitives( + self, dtype: Type[StdPrimitive], count: int + ) -> NDArray[StdPrimitive]: + raise NotImplementedError("Pull requests are welcome") + + +_PlatformSpecificDeserializer = { + "little": _LittleEndianDeserializer, + "big": _BigEndianDeserializer, +}[sys.byteorder] + + +class ZeroExtendingBuffer: + """ + This class implements the implicit zero extension logic as described in the Specification. + A read beyond the end of the buffer returns zero bytes. + """ + + def __init__(self, fragmented_buffer: Sequence[memoryview]): + # TODO: Concatenation is a tentative measure. Add proper support for fragmented buffers for speed. + if len(fragmented_buffer) == 1: + contiguous: bytearray | memoryview = fragmented_buffer[0] # Fast path. + else: + contiguous = bytearray().join(fragmented_buffer) + + self._buf: NDArray[Byte] = numpy.frombuffer(contiguous, dtype=Byte) + assert self._buf.dtype == Byte and self._buf.ndim == 1 + + @property + def bit_length(self) -> int: + return len(self._buf) * 8 + + def get_byte(self, index: int) -> int: + """ + Like the standard ``x[i]`` except that i may not be negative and out of range access returns zero. + """ + if index < 0: + raise ValueError("Byte index may not be negative because the end of a zero-extended buffer is undefined.") + try: + return int(self._buf[index]) + except IndexError: + return 0 # Implicit zero extension rule + + def get_unsigned_slice(self, left: int, right: int) -> NDArray[Byte]: + """ + Like the standard ``x[left:right]`` except that neither index may be negative, + left may not exceed right (otherwise it's a :class:`ValueError`), + and the returned value is always of size ``right-left`` right-zero-padded if necessary. + """ + if not (0 <= left <= right): + raise ValueError(f"Invalid slice boundary specification: [{left}:{right}]") + count = int(right - left) + assert count >= 0 + out: NDArray[Byte] = self._buf[left:right] # Slicing never raises an IndexError. + if len(out) < count: # Implicit zero extension rule + out = numpy.concatenate((out, numpy.zeros(count - len(out), dtype=Byte))) + assert len(out) == count + return out + + def fork_bytes(self, offset_bytes: int, length_bytes: int) -> Sequence[memoryview]: + """ + This is intended for use with :meth:`Deserializer.fork_bytes`. + Given an offset from the beginning and length (both in bytes), yields a list of compliant memory fragments + that can be fed into the forked deserializer instance. + The requested (offset + length) shall not exceeded the buffer length; this is because per the Specification, + a delimiter header cannot exceed the amount of remaining space in the deserialization buffer. + """ + # Currently, we use a contiguous buffer, but when scattered buffers are supported, this method will need + # to discard the fragments before the requested offset and then return the following subset of fragments. + if offset_bytes + length_bytes > len(self._buf): + raise ValueError(f"Invalid fork: offset ({offset_bytes}) + length ({length_bytes}) > {len(self._buf)}") + out = memoryview(self._buf[offset_bytes : offset_bytes + length_bytes]) # type: ignore + assert len(out) == length_bytes + return [out] + + def to_base64(self) -> str: + return base64.b64encode(self._buf.tobytes()).decode() + + +def _ensure_cardinal(i: int) -> None: + if i < 0: + raise ValueError(f"Cardinal may not be negative: {i}") + + +def test_deserializer_aligned() -> None: + """This is a unit test, not for production use.""" + from pytest import raises, approx + + # The buffer is constructed from the corresponding serialization test. + # The final bit padding is done with 1's to ensure that they are correctly discarded. + sample = bytes( + map( + lambda x: int(x, 2), + "10100111 11101111 11001101 10101011 10010000 01111000 01010110 00110100 00010010 10001000 10101001 " + "11001011 11101101 11111110 11111111 00000000 01111111 00000000 00000000 00000000 00000000 00000000 " + "00000000 11110000 00111111 00000000 00000000 10000000 00111111 00000000 01111100 11011010 00001110 " + "11011010 10111110 11111110 00000001 10101101 11011110 11101111 10111110 11000101 01100111 11000101 " + "11101011".split(), + ) + ) + assert len(sample) == 45 + + des = Deserializer.new([memoryview(sample)]) + assert des.remaining_bit_length == 45 * 8 + + assert des.fetch_aligned_u8() == 0b1010_0111 + assert des.fetch_aligned_i64() == 0x1234_5678_90AB_CDEF + assert des.fetch_aligned_i32() == -0x1234_5678 + assert des.fetch_aligned_i16() == -2 + + assert des.remaining_bit_length == 45 * 8 - 8 - 64 - 32 - 16 + des.skip_bits(8) + assert des.remaining_bit_length == 45 * 8 - 8 - 64 - 32 - 16 - 8 + + assert des.fetch_aligned_i8() == 127 + assert des.fetch_aligned_f64() == approx(1.0) + assert des.fetch_aligned_f32() == approx(1.0) + assert des.fetch_aligned_f16() == numpy.inf + + assert des.fetch_aligned_unsigned(12) == 0xEDA + des.skip_bits(4) + assert des.fetch_aligned_unsigned(16) == 0xBEDA + assert des.fetch_aligned_signed(9) == -2 + des.skip_bits(7) + + assert all(des.fetch_aligned_array_of_standard_bit_length_primitives(numpy.uint16, 2) == [0xDEAD, 0xBEEF]) + + assert all( + des.fetch_aligned_array_of_bits(16) + == [ + True, + False, + True, + False, + False, + False, + True, + True, + True, + True, + True, + False, + False, + True, + True, + False, + ] + ) + + assert all( + des.fetch_aligned_array_of_bits(13) + == [ + True, + False, + True, + False, + False, + False, + True, + True, + True, + True, + False, + True, + False, + ] + ) + + print("repr(deserializer):", repr(des)) + + des = Deserializer.new([memoryview(bytes([1, 2, 3]))]) + + assert list(des.fetch_aligned_array_of_bits(0)) == [] + assert list(des.fetch_aligned_bytes(0)) == [] + assert des.remaining_bit_length == 3 * 8 + + with raises(ValueError): + des.fetch_aligned_array_of_bits(-1) + + with raises(ValueError): + des.fetch_aligned_bytes(-1) + + des.skip_bits(3 * 8) + assert des.remaining_bit_length == 0 + + assert all([False] * 100 == des.fetch_aligned_array_of_bits(100)) # type: ignore + assert des.remaining_bit_length == -100 + des.skip_bits(4) + assert des.remaining_bit_length == -104 + assert b"\x00" * 10 == des.fetch_aligned_bytes(10).tobytes() + assert des.remaining_bit_length == -184 + des.skip_bits(64) + assert des.remaining_bit_length == -248 + assert 0 == des.fetch_aligned_unsigned(64) + assert des.remaining_bit_length == -312 + + print("repr(deserializer):", repr(des)) + + +def test_deserializer_unaligned() -> None: + """This is a unit test, not for production use.""" + from pytest import approx + + des = Deserializer.new([memoryview(bytearray([0b10101010, 0b01011101, 0b11001100, 0b10010001]))]) + assert des.consumed_bit_length == 0 + assert des.consumed_bit_length % 8 == 0 + assert list(des.fetch_aligned_array_of_bits(3)) == [False, True, False] + assert des.consumed_bit_length == 3 + assert des.consumed_bit_length % 8 == 3 + assert list(des.fetch_unaligned_bytes(0)) == [] + assert list(des.fetch_unaligned_bytes(2)) == [0b10110101, 0b10001011] + assert list(des.fetch_unaligned_bytes(1)) == [0b00111001] + assert des.consumed_bit_length == 27 + assert des.consumed_bit_length % 8 == 3 + assert des.remaining_bit_length == 5 + assert all(numpy.array([0b00010010, 0], dtype=Byte) == des.fetch_unaligned_bytes(2)) + assert des.consumed_bit_length == 43 + assert des.remaining_bit_length == -11 + + des = Deserializer.new([memoryview(bytearray([0b10101010, 0b01011101, 0b11001100, 0b10010001]))]) + assert list(des.fetch_unaligned_bytes(0)) == [] + assert list(des.fetch_unaligned_bytes(2)) == [ + 0b10101010, + 0b01011101, + ] # Actually aligned + assert list(des.fetch_unaligned_bytes(1)) == [0b11001100] + assert des.remaining_bit_length == 8 + assert list(des.fetch_unaligned_bytes(2)) == [0b10010001, 0] + assert des.remaining_bit_length == -8 + + # The buffer is constructed from the corresponding serialization test. + sample = bytearray( + map( + lambda x: int(x, 2), + "11000101 00101111 01010111 10000010 11000110 11001010 00010010 00110100 01010110 11011001 10111111 " + "11101100 00000110 00000000 00000000 00000000 00000000 00000000 10000000 11111111 00000001 00000000 " + "00000000 11111100 00000001 11100000 01101111 11110101 01111110 11110111 00000101".split(), + ) + ) + assert len(sample) == 31 + + des = Deserializer.new([memoryview(sample[:])]) + assert des.remaining_bit_length == 31 * 8 + + assert list(des.fetch_unaligned_array_of_bits(11)) == [ + True, + False, + True, + False, + False, + False, + True, + True, # 10100011 + True, + True, + True, # 111 + ] + assert list(des.fetch_unaligned_array_of_bits(10)) == [ + True, + False, + True, + False, + False, # ???10100 (byte alignment restored here) + True, + True, + True, + False, + True, # 11101 (byte alignment lost, three bits short) + ] + + assert list(des.fetch_unaligned_bytes(3)) == [0x12, 0x34, 0x56] + assert list(des.fetch_unaligned_array_of_bits(3)) == [False, True, True] + assert list(des.fetch_unaligned_bytes(3)) == [0x12, 0x34, 0x56] + + assert des.fetch_unaligned_bit() + assert not des.fetch_unaligned_bit() + assert not des.fetch_unaligned_bit() + assert des.fetch_unaligned_bit() + assert des.fetch_unaligned_bit() + + assert des.fetch_unaligned_signed(8) == -2 + assert des.fetch_unaligned_unsigned(11) == 0b111_0110_0101 + assert des.fetch_unaligned_unsigned(3) == 0b110 + + assert des.consumed_bit_length % 8 > 0 # not aligned + assert des.fetch_unaligned_f64() == approx(1.0) + assert des.fetch_unaligned_f32() == approx(1.0) + assert des.fetch_unaligned_f16() == -numpy.inf + + assert list(des.fetch_unaligned_array_of_standard_bit_length_primitives(numpy.uint16, 2)) == [0xDEAD, 0xBEEF] + des.skip_bits(5) + assert des.consumed_bit_length % 8 == 0 + assert des.remaining_bit_length == 0 + + print("repr(deserializer):", repr(des)) + + +def test_deserializer_fork_bytes() -> None: + """This is a unit test, not for production use.""" + import pytest + + m = Deserializer.new( + [ + memoryview( + bytes( + [ + 0b10100111, + 0b11101111, + 0b11001101, + 0b10101011, + 0b10010000, + 0b01111000, + 0b01010110, + 0b00110100, + ] + ) + ) + ] + ) + with pytest.raises(ValueError): + m.fork_bytes(9) + + f = m.fork_bytes(8) + assert f.consumed_bit_length == 0 + assert f.remaining_bit_length == 8 * 8 + assert f.fetch_aligned_u8() == 0b10100111 + assert f.remaining_bit_length == 7 * 8 + assert f.fetch_aligned_u8() == 0b11101111 + assert f.remaining_bit_length == 6 * 8 + assert f.consumed_bit_length == 16 + + assert m.remaining_bit_length == 8 * 8 + m.skip_bits(6 * 8) + assert m.remaining_bit_length == 2 * 8 + assert m.fetch_aligned_u8() == 0b01010110 + assert m.fetch_aligned_u8() == 0b00110100 + assert m.remaining_bit_length == 0 + assert m.fetch_aligned_u8() == 0 + assert m.fetch_aligned_u16() == 0 + assert m.fetch_aligned_u32() == 0 + assert m.fetch_aligned_u64() == 0 + + assert f.remaining_bit_length == 6 * 8 + ff = f.fork_bytes(2) + assert ff.consumed_bit_length == 0 + assert ff.remaining_bit_length == 16 + assert ff.fetch_aligned_u8() == 0b11001101 + assert ff.fetch_aligned_u8() == 0b10101011 + assert ff.remaining_bit_length == 0 + assert ff.consumed_bit_length == 16 + assert ff.fetch_aligned_u8() == 0 + assert ff.fetch_aligned_u16() == 0 + assert ff.fetch_aligned_u32() == 0 + assert ff.fetch_aligned_u64() == 0 + + f.skip_bits(40) + assert f.consumed_bit_length == 56 + assert f.remaining_bit_length == 8 + assert f.fetch_aligned_u8() == 0b00110100 + assert f.remaining_bit_length == 0 + + +# ================================================== USER CODE API ================================================== + + +def serialize(obj: Any) -> Iterable[memoryview]: + """ + Constructs a serialized representation of the provided top-level object. + The resulting serialized representation is padded to one byte in accordance with the Cyphal specification. + The constructed serialized representation is returned as a sequence of byte-aligned fragments which must be + concatenated in order to obtain the final representation. + The objective of this model is to avoid copying data into a temporary buffer when possible. + Each yielded fragment is of type :class:`memoryview` pointing to raw unsigned bytes. + It is guaranteed that at least one fragment is always returned (which may be empty). + """ + try: + fun = obj._serialize_ + except AttributeError: + raise TypeError(f"Cannot serialize object of type {type(obj)}") from None + # TODO: update the Serializer class to emit an iterable of fragments. + ser = Serializer.new(obj._EXTENT_BYTES_) + fun(ser) + yield ser.buffer.data + + +def deserialize(dtype: Type[T], fragmented_serialized_representation: Sequence[memoryview]) -> T | None: + """ + Constructs an instance of the supplied DSDL-generated data type from its serialized representation. + Returns None if the provided serialized representation is invalid. + + This function will never raise an exception for invalid input data; the only possible outcome of an invalid data + being supplied is None at the output. A raised exception can only indicate an error in the deserialization logic. + + .. important:: The constructed object may contain arrays referencing the memory allocated for the serialized + representation. Therefore, in order to avoid unintended data corruption, the caller should destroy all + references to the serialized representation after the invocation. + + .. important:: The supplied fragments of the serialized representation should be writeable. + If they are not, some of the array-typed fields of the constructed object may be read-only. + """ + try: + fun = dtype._deserialize_ # type: ignore + except AttributeError: + raise TypeError(f"Cannot deserialize using type {dtype}") from None + deserializer = Deserializer.new(fragmented_serialized_representation) + try: + return cast(T, fun(deserializer)) + except Deserializer.FormatError: + _logger.info( + "Invalid serialized representation of %s: %s", + get_model(dtype), + deserializer, + exc_info=True, + ) + return None + + +def get_model(class_or_instance: Any) -> pydsdl.CompositeType: + """ + Obtains a PyDSDL model of the supplied DSDL-generated class or its instance. + This is the inverse of :func:`get_class`. + """ + out = class_or_instance._MODEL_ + assert isinstance(out, pydsdl.CompositeType) + return out + + +def get_class(model: pydsdl.CompositeType) -> type: + """ + Returns a generated native class implementing the specified DSDL type represented by its PyDSDL model object. + Promotes the model to delimited type automatically if necessary. + This is the inverse of :func:`get_model`. + + :raises: + - :class:`ImportError` if the generated package or subpackage cannot be found. + + - :class:`AttributeError` if the package is found but it does not contain the requested type. + + - :class:`TypeError` if the requested type is found, but its model does not match the input argument. + This error may occur if the DSDL source has changed since the type was generated. + To fix this, regenerate the package and make sure that all components of the application use identical + or compatible DSDL source files. + """ + + def do_import(name_components: list[str]) -> Any: + mod = None + for comp in name_components: + name = (mod.__name__ + "." + comp) if mod else comp # type: ignore + try: + mod = importlib.import_module(name) + except ImportError: # We seem to have hit a reserved word; try with an underscore. + mod = importlib.import_module(name + "_") + return mod + + if model.has_parent_service: # uavcan.node.GetInfo.Request --> uavcan.node.GetInfo then Request + parent_name, child_name = model.name_components[-2:] + mod = do_import(model.name_components[:-2]) + out = getattr(mod, f"{parent_name}_{model.version.major}_{model.version.minor}") + out = getattr(out, child_name) + else: + mod = do_import(model.name_components[:-1]) + out = getattr(mod, f"{model.short_name}_{model.version.major}_{model.version.minor}") + + out_model = get_model(out) + if out_model.inner_type != model.inner_type: + raise TypeError( + f"The class has been generated using an incompatible DSDL definition. " + f"Requested model: {model} defined in {model.source_file_path}. " + f"Model found in the class: {out_model} defined in {out_model.source_file_path}." + ) + + assert str(get_model(out)) == str(model) + assert isinstance(out, type) + return out + + +def get_extent_bytes(class_or_instance: Any) -> int: + return int(class_or_instance._EXTENT_BYTES_) + + +def get_fixed_port_id(class_or_instance: Any) -> int | None: + """ + Returns None if the supplied type has no fixed port-ID. + """ + try: + out = int(class_or_instance._FIXED_PORT_ID_) + except (TypeError, AttributeError): + return None + else: + assert 0 <= out < 2**16 + return out + + +def get_attribute(obj: Any, name: str) -> Any: + """ + DSDL type attributes whose names can't be represented in Python (such as ``def`` or ``type``) + are suffixed with an underscore. + This function allows the caller to read arbitrary attributes referring to them by their original + DSDL names, e.g., ``def`` instead of ``def_``. + + This function behaves like :func:`getattr` if the attribute does not exist. + """ + try: + return getattr(obj, name) + except AttributeError: + return getattr(obj, name + "_") + + +def set_attribute(obj: Any, name: str, value: Any) -> None: + """ + DSDL type attributes whose names can't be represented in Python (such as ``def`` or ``type``) + are suffixed with an underscore. + This function allows the caller to assign arbitrary attributes referring to them by their original DSDL names, + e.g., ``def`` instead of ``def_``. + + If the attribute does not exist, raises :class:`AttributeError`. + """ + suffixed = name + "_" + # We can't call setattr() without asking first because if it doesn't exist it will be created, + # which would be disastrous. + if hasattr(obj, name): + setattr(obj, name, value) + elif hasattr(obj, suffixed): + setattr(obj, suffixed, value) + else: + raise AttributeError(name) + + +def is_serializable(dtype: Any) -> bool: + """ + Whether the passed type is a DSDL-generated serializable type. + """ + return ( + hasattr(dtype, "_MODEL_") + and hasattr(dtype, "_EXTENT_BYTES_") + and hasattr(dtype, "_serialize_") + and hasattr(dtype, "_deserialize_") + ) + + +def is_message_type(dtype: Any) -> bool: + """ + Whether the passed type is generated from a DSDL message type. + """ + return is_serializable(dtype) and not hasattr(dtype, "Request") and not hasattr(dtype, "Response") + + +def is_service_type(dtype: Any) -> bool: + """ + Whether the passed type is generated from a DSDL service type, excluding its nested Request and Response types. + """ + return ( + hasattr(dtype, "_MODEL_") + and is_serializable(getattr(dtype, "Request", None)) + and is_serializable(getattr(dtype, "Response", None)) + ) + + +def to_builtin(obj: object) -> dict[str, Any]: + """ + Accepts a DSDL object (an instance of a Python class auto-generated from a DSDL definition), + returns its value represented using only native built-in types: dict, list, bool, int, float, str. + Ordering of dict elements is guaranteed to match the field ordering of the source definition. + Keys of dicts representing DSDL objects use the original unstropped names from the source DSDL definition; + e.g., ``if``, not ``if_``. + + This is intended for use with JSON, YAML, and other serialization formats. + + >>> import json + >>> import uavcan.primitive.array + >>> json.dumps(to_builtin(uavcan.primitive.array.Integer32_1_0([-123, 456, 0]))) + '{"value": [-123, 456, 0]}' + >>> import uavcan.register + >>> request = uavcan.register.Access_1_0.Request( + ... uavcan.register.Name_1_0('my.register'), + ... uavcan.register.Value_1_0(integer16=uavcan.primitive.array.Integer16_1_0([1, 2, +42, -10_000])) + ... ) + >>> to_builtin(request) # doctest: +NORMALIZE_WHITESPACE + {'name': {'name': 'my.register'}, + 'value': {'integer16': {'value': [1, 2, 42, -10000]}}} + """ + model = get_model(obj) + if isinstance(model, pydsdl.ServiceType): # pragma: no cover + raise TypeError( + f"Built-in form is not defined for service types. " + f"Did you mean to use Request or Response? Input type: {model}" + ) + out = _to_builtin_impl(obj, model) + assert isinstance(out, dict) + return out + + +def _to_builtin_impl( + obj: object | NDArray[Any] | str | bool | int | float, model: pydsdl.SerializableType +) -> dict[str, Any] | list[Any] | str | bool | int | float: + if isinstance(model, pydsdl.CompositeType): + return { + f.name: _to_builtin_impl(get_attribute(obj, f.name), f.data_type) + for f in model.fields_except_padding + if get_attribute(obj, f.name) is not None # The check is to hide inactive union variants. + } + + if isinstance(model, pydsdl.ArrayType): + assert isinstance(obj, numpy.ndarray) + # TODO: drop this special case when strings are natively supported in DSDL. + printable = set(map(ord, string.printable)) + if model.string_like and all(map(lambda x: x in printable, obj.tobytes())): + try: + return bytes(e for e in obj).decode() + except UnicodeError: + return list(map(int, obj)) + return [_to_builtin_impl(e, model.element_type) for e in obj] + + if isinstance(model, pydsdl.PrimitiveType): + # The explicit conversions are needed to get rid of NumPy scalar types. + if isinstance(model, pydsdl.IntegerType): + return int(obj) # type: ignore + if isinstance(model, pydsdl.FloatType): + return float(obj) # type: ignore + if isinstance(model, pydsdl.BooleanType): + return bool(obj) + assert isinstance(obj, str) + return obj + + assert False, "Unexpected inputs" + + +def update_from_builtin(destination: T, source: Any) -> T: + """ + Updates the provided DSDL object (an instance of a Python class auto-generated from a DSDL definition) + with the values from a native representation, where DSDL objects are represented as dicts, arrays + are lists, and primitives are represented as int/float/bool. This is the reverse of :func:`to_builtin`. + Values that are not specified in the source are not updated (left at their original values), + so an empty source will leave the input object unchanged. + + Source field names shall match the original unstropped names provided in the DSDL definition; + e.g., `if`, not `if_`. If there is more than one variant specified for a union type, the last + specified variant takes precedence. + If the structure of the source does not match the destination object, the correct representation + may be deduced automatically as long as it can be done unambiguously. + + :param destination: The object to update. The update will be done in-place. If you don't want the source + object modified, clone it beforehand. + + :param source: The :class:`dict` instance containing the values to update the destination object with. + + :return: A reference to destination (not a copy). + + :raises: :class:`ValueError` if the provided source values cannot be applied to the destination object, + or if the source contains fields that are not present in the destination object. + :class:`TypeError` if an entity of the source cannot be converted into the type expected by the destination. + + >>> import uavcan.primitive.array + >>> import uavcan.register + >>> request = uavcan.register.Access_1_0.Request( + ... uavcan.register.Name_1_0('my.register'), + ... uavcan.register.Value_1_0(string=uavcan.primitive.String_1_0('Hello world!')) + ... ) + >>> request + uavcan.register.Access.Request...name='my.register'...value='Hello world!'... + >>> update_from_builtin(request, { # Switch the Value union from string to int16; keep the name unchanged. + ... 'value': { + ... 'integer16': { + ... 'value': [1, 2, 42, -10000] + ... } + ... } + ... }) # doctest: +NORMALIZE_WHITESPACE + uavcan.register.Access.Request...name='my.register'...value=[ 1, 2, 42,-10000]... + + The following examples showcase positional initialization: + + >>> from uavcan.node import Heartbeat_1 + >>> update_from_builtin(Heartbeat_1(), [123456, 1, 2]) # doctest: +NORMALIZE_WHITESPACE + uavcan.node.Heartbeat.1.0(uptime=123456, + health=uavcan.node.Health.1.0(value=1), + mode=uavcan.node.Mode.1.0(value=2), + vendor_specific_status_code=0) + >>> update_from_builtin(Heartbeat_1(), 123456) # doctest: +NORMALIZE_WHITESPACE + uavcan.node.Heartbeat.1.0(uptime=123456, + health=uavcan.node.Health.1.0(value=0), + mode=uavcan.node.Mode.1.0(value=0), + vendor_specific_status_code=0) + >>> update_from_builtin(Heartbeat_1(), [0, 0, 0, 0, 0]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError: ... + + >>> update_from_builtin(uavcan.primitive.array.Real64_1(), 123.456) # doctest: +NORMALIZE_WHITESPACE + uavcan.primitive.array.Real64.1.0(value=[123.456]) + >>> update_from_builtin(uavcan.primitive.array.Real64_1(), [123.456]) # doctest: +NORMALIZE_WHITESPACE + uavcan.primitive.array.Real64.1.0(value=[123.456]) + >>> update_from_builtin(uavcan.primitive.array.Real64_1(), [123.456, -9]) # doctest: +NORMALIZE_WHITESPACE + uavcan.primitive.array.Real64.1.0(value=[123.456, -9. ]) + + >>> update_from_builtin(uavcan.register.Access_1_0.Request(), ["X", {"integer8": 99}]) # Same as the next one! + uavcan.register.Access.Request...name='X'...value=[99]... + >>> update_from_builtin(uavcan.register.Access_1_0.Request(), {'name': 'X', 'value': {'integer8': {'value': [99]}}}) + uavcan.register.Access.Request...name='X'...value=[99]... + """ + _logger.debug("update_from_builtin: destination/source on the next lines:\n%r\n%r", destination, source) + model = get_model(destination) + if isinstance(model, pydsdl.ServiceType): # pragma: no cover + raise TypeError( + f"Built-in form is not defined for service types. " + f"Did you mean to use Request or Response? Input type: {model}" + ) + fields = model.fields_except_padding + + # UX improvement: https://github.com/OpenCyphal/pycyphal/issues/147 -- match the source against the data type. + if not isinstance(source, dict): + if not isinstance(source, (list, tuple)): # Assume positional initialization. + source = (source,) + can_propagate = fields and isinstance(fields[0].data_type, (pydsdl.ArrayType, pydsdl.CompositeType)) + too_many_values = len(source) > (1 if isinstance(model.inner_type, pydsdl.UnionType) else len(fields)) + if can_propagate and too_many_values: + _logger.debug( + "update_from_builtin: %d source values cannot be applied to %s but the first field accepts " + "positional initialization -- propagating down", + len(source), + type(destination).__name__, + ) + source = [source] + if len(source) > len(fields): + raise TypeError( + f"Cannot apply {len(source)} values to {len(fields)} fields in {type(destination).__name__}" + ) + source = {f.name: v for f, v in zip(fields, source)} + return update_from_builtin(destination, source) + + source = dict(source) # Create copy to prevent mutation of the original + + for f in fields: + field_type = f.data_type + try: + value = source.pop(f.name) + except LookupError: + continue # No value specified, keep original value + + if isinstance(field_type, pydsdl.CompositeType): + field_obj = get_attribute(destination, f.name) + if field_obj is None: # Oh, this is a union + field_obj = get_class(field_type)() # The variant was not selected, construct a default + set_attribute(destination, f.name, field_obj) # Switch the union to the new variant + update_from_builtin(field_obj, value) + + elif isinstance(field_type, pydsdl.ArrayType): + element_type = field_type.element_type + if isinstance(element_type, pydsdl.PrimitiveType): + set_attribute(destination, f.name, value) + elif isinstance(element_type, pydsdl.CompositeType): + dtype = get_class(element_type) + set_attribute(destination, f.name, [update_from_builtin(dtype(), s) for s in value]) + else: + assert False, f"Unexpected array element type: {element_type!r}" + + elif isinstance(field_type, pydsdl.PrimitiveType): + set_attribute(destination, f.name, value) + + else: + assert False, f"Unexpected field type: {field_type!r}" + + if source: + raise ValueError(f"No such fields in {model}: {list(source.keys())}") + + return destination diff --git a/src/nunavut/lang/py/templates/Any.j2 b/src/nunavut/lang/py/templates/Any.j2 deleted file mode 100644 index 30140248..00000000 --- a/src/nunavut/lang/py/templates/Any.j2 +++ /dev/null @@ -1,4 +0,0 @@ - -{ - "full_name": "{{ T.full_name }}" -} diff --git a/src/nunavut/lang/py/templates/DelimitedType.j2 b/src/nunavut/lang/py/templates/DelimitedType.j2 new file mode 100644 index 00000000..a7fce23c --- /dev/null +++ b/src/nunavut/lang/py/templates/DelimitedType.j2 @@ -0,0 +1,7 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{%- include 'StructureType.j2' -%} diff --git a/src/nunavut/lang/py/templates/Namespace.j2 b/src/nunavut/lang/py/templates/Namespace.j2 new file mode 100644 index 00000000..00f379d6 --- /dev/null +++ b/src/nunavut/lang/py/templates/Namespace.j2 @@ -0,0 +1,21 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} +# AUTOGENERATED, DO NOT EDIT. +# +# Generated at: {{ now_utc }} UTC +# Namespace: {{ T.full_name }} +{% if T.get_nested_types() %} + {%- for t, _ in T.get_nested_types() %} +from {{ t|full_reference_name }} import {{ t|short_reference_name }} as {{ t|short_reference_name }} + {%- endfor %} + +# Convenience aliases of the newest minor versions known at the time of code generation. + {%- for alias, t in T.get_nested_types()|map("first")|newest_minor_version_aliases %} +{{ alias }} = {{ t|short_reference_name }} + {%- endfor %} +{%- else %} +# There are no data types in this namespace. There may be data types in nested namespaces. +{%- endif %} diff --git a/src/nunavut/lang/py/templates/ServiceType.j2 b/src/nunavut/lang/py/templates/ServiceType.j2 new file mode 100644 index 00000000..4350725c --- /dev/null +++ b/src/nunavut/lang/py/templates/ServiceType.j2 @@ -0,0 +1,27 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{% extends "base.j2" -%} +{%- block contents -%} + +# noinspection PyPep8Naming, SpellCheckingInspection, DuplicatedCode +class {{ T|short_reference_name }}: + {{ data_schema('Request', T.request_type, T|short_reference_name)|indent }} + + {{ data_schema('Response', T.response_type, T|short_reference_name)|indent }} + + def __repr__(self) -> str: + return '{{ T }}()' + + {% if T.has_fixed_port_id %} + _FIXED_PORT_ID_ = {{ T.fixed_port_id|int }} + {%- endif %} + _MODEL_: _pydsdl_.ServiceType = _restore_constant_( + {{ T | pickle | indent(8) }} + ) + assert isinstance(_MODEL_, _pydsdl_.ServiceType) + +{%- endblock -%} diff --git a/src/nunavut/lang/py/templates/StructureType.j2 b/src/nunavut/lang/py/templates/StructureType.j2 new file mode 100644 index 00000000..232ba1bc --- /dev/null +++ b/src/nunavut/lang/py/templates/StructureType.j2 @@ -0,0 +1,12 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{% extends "base.j2" %} +{%- block contents -%} + +{{ data_schema(T|short_reference_name, T) }} + +{%- endblock -%} diff --git a/src/nunavut/lang/py/templates/UnionType.j2 b/src/nunavut/lang/py/templates/UnionType.j2 new file mode 100644 index 00000000..a7fce23c --- /dev/null +++ b/src/nunavut/lang/py/templates/UnionType.j2 @@ -0,0 +1,7 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{%- include 'StructureType.j2' -%} diff --git a/src/nunavut/lang/py/templates/__init__.py b/src/nunavut/lang/py/templates/__init__.py index 39ac0579..d045ff71 100644 --- a/src/nunavut/lang/py/templates/__init__.py +++ b/src/nunavut/lang/py/templates/__init__.py @@ -1,8 +1,7 @@ -# # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# Copyright (C) 2018-2021 OpenCyphal Development Team +# Copyright (C) OpenCyphal Development Team # This software is distributed under the terms of the MIT License. -# + """ Contains the Jinja templates to generate Py headers. """ diff --git a/src/nunavut/lang/py/templates/base.j2 b/src/nunavut/lang/py/templates/base.j2 new file mode 100644 index 00000000..d0592c22 --- /dev/null +++ b/src/nunavut/lang/py/templates/base.j2 @@ -0,0 +1,428 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} +# AUTOGENERATED, DO NOT EDIT. +# +# Source file: +# {{ T.source_file_path }} +# +# Generated at: {{ now_utc }} UTC +# Is deprecated: {{ T.deprecated and 'yes' or 'no' }} +# Fixed port ID: {{ T.fixed_port_id }} +# Full name: {{ T.full_name }} +# Version: {{ T.version.major }}.{{ T.version.minor }} +# +# pylint: skip-file +# mypy: warn_unused_ignores=False + +{% assert options.enable_serialization_asserts -%} +{#- These templates always generate serialization asserts. -#} + +{% set ARRAY_PRINT_SUMMARIZATION_THRESHOLD = 100 -%} +{#- How many elements in the array trigger summarization rather than full output. + #- Summarization replaces middle elements with an ellipsis. -#} + +from __future__ import annotations +from nunavut_support import Serializer as _Serializer_, Deserializer as _Deserializer_, API_VERSION as _NSAPIV_ +import numpy as _np_ +from numpy.typing import NDArray as _NDArray_ +import pydsdl as _pydsdl_ +{%- if T.deprecated %} +import warnings as _warnings_ +{%- endif -%} +{%- for n in T|imports %} +import {{ n }} +{%- endfor %} + +if _NSAPIV_[0] != {{ nunavut.support.version[0] }}: + raise RuntimeError( + f"Incompatible Nunavut support API version: support { _NSAPIV_ }, package {{ nunavut.support.version }}" + ) + +{%- from 'serialization.j2' import serialize -%} +{%- from 'deserialization.j2' import deserialize -%} + + +{#- + # FIELD TYPE ANNOTATIONS. +-#} +{%- macro strict_type_annotation(t) -%} + {%- if t is BooleanType -%} bool + {%- elif t is IntegerType -%} int + {%- elif t is FloatType -%} float + {%- elif t is ArrayType -%} _NDArray_[{{ t.element_type|numpy_scalar_type }}] + {%- elif t is CompositeType -%} {{ t|full_reference_name }} + {%- else -%}{% assert False %} + {%- endif -%} +{%- endmacro -%} + +{%- macro relaxed_type_annotation(t) -%} + {%- if t is BooleanType -%} bool + {%- elif t is IntegerType -%} int | {{ t|numpy_scalar_type }} + {%- elif t is FloatType -%} int | float | {{ t|numpy_scalar_type }} + {%- elif t is CompositeType -%} {{ t|full_reference_name }} + {%- elif t is ArrayType -%} + {%- if (t.element_type is UnsignedIntegerType) and t.element_type.bit_length <= 8 -%} + _NDArray_[{{ t.element_type|numpy_scalar_type }}] | list[int] | memoryview | bytes | bytearray {% if t.string_like -%}| str{%- endif -%} + {%- else -%} + _NDArray_[{{ t.element_type|numpy_scalar_type }}] | list[{{ strict_type_annotation(t.element_type) }}] + {%- endif -%} + {%- else -%}{% assert False %} + {%- endif -%} +{%- endmacro -%} + + +{#- + # ARRAY ASSIGNMENT BLOCK. + # Validates the type and dimensionality of the input array, and converts it into the proper type as necessary. + # Emits post-assignment invariant checks to ensure correct behavior of the generated code. +-#} +{%- macro assign_array(f, src) -%} + {%- set t = f.data_type -%} + {%- if t is FixedLengthArrayType -%} {%- set cmp = '==' -%} + {%- elif t is VariableLengthArrayType -%} {%- set cmp = '<=' -%} + {%- else -%}{%- assert False -%} + {%- endif -%} + + {%- if t.string_like -%} {#- DSDL uses UTF-8, which is the default in Python. -#} + {{ src }} = {{ src }}.encode() if isinstance({{ src }}, str) else {{ src }} # Implicit string encoding + {% endif -%} + + {%- if t.element_type is UnsignedIntegerType and t.element_type.bit_length <= 8 -%} + if isinstance({{ src }}, (bytes, bytearray)) and len({{ src }}) {{ cmp }} {{ t.capacity }}: + # Fast zero-copy initialization from buffer. Necessary when dealing with images, point clouds, etc. + # Mutability will be inherited; e.g., bytes - immutable, bytearray - mutable. + self._{{ f|id }} = _np_.frombuffer({{ src }}, {{ t.element_type|numpy_scalar_type }}) # type: ignore + el {#- Concatenated with the "if" below -#} + {% endif -%} + + if isinstance({{ src }}, _np_.ndarray) + {#- #} and {{ src }}.dtype == {{ t.element_type|numpy_scalar_type }} + {#- #} and {{ src }}.ndim == 1 + {#- #} and {{ src }}.size {{ cmp }} {{ t.capacity }}: # type: ignore + # Fast binding if the source array has the same type and dimensionality. Beware of the shared reference. + self._{{ f|id }} = {{ src }} + else: + # Last resort, slow construction of a new array. New memory may be allocated. + {{ src }} = _np_.array({{ src }}, {{ t.element_type|numpy_scalar_type }}).flatten() + if not {{ src }}.size {{ cmp }} {{ t.capacity }}: # Length cannot be checked before casting and flattening + raise ValueError(f'{{ f.name }}: invalid array length: not { {{- src }}.size} {{ cmp }} {{ t.capacity }}') + self._{{ f|id }} = {{ src }} + assert isinstance(self._{{ f|id }}, _np_.ndarray) + assert self._{{ f|id }}.dtype == {{ t.element_type|numpy_scalar_type }} # type: ignore + assert self._{{ f|id }}.ndim == 1 + assert len(self._{{ f|id }}) {{ cmp }} {{ t.capacity }} +{%- endmacro -%} + + +{#- + # FIELD TO STRING CONVERSION. + # Emits an expression that constructs a string-printable representation of the field. + # The resulting expression shall be wrapped into str() or fed into '%s'. +-#} +{%- macro printable_field_representation(f) -%} + {%- if f.data_type is ArrayType -%} + {%- if f.data_type.string_like -%} + repr(bytes(self.{{ f|id }}))[1:] + {%- else -%} + _np_.array2string(self.{{ f|id }}, separator=',', edgeitems=10, {# -#} + threshold={{ ARRAY_PRINT_SUMMARIZATION_THRESHOLD }}, {# -#} + max_line_width={{ ARRAY_PRINT_SUMMARIZATION_THRESHOLD * 10000 }}) + {%- endif -%} + {%- else -%} + self.{{ f|id }} + {%- endif -%} +{%- endmacro -%} + + +{#- + # MAIN CODE GENERATION MACRO. + # Accepts the name of the generated type and its DSDL type descriptor object of type pydsdl.CompositeType. +-#} +{%- macro data_schema(name, type, parent_class_name=None) -%} +{%- set full_class_name = ((parent_class_name + '.') if parent_class_name else '') + name -%} +# noinspection PyUnresolvedReferences, PyPep8, PyPep8Naming, SpellCheckingInspection, DuplicatedCode +class {{ name }}: + """ + Generated property settings use relaxed type signatures, accepting a large variety of + possible representations of the value, which are automatically converted to a well-defined + internal representation. When accessing a property, this strict well-defined internal + representation is always returned. The implicit strictification enables more precise static + type analysis. + + The value returned by the __repr__() method may be invariant to some of the field values, + and its format is not guaranteed to be stable. Therefore, the returned string representation + can be used only for displaying purposes; any kind of automation build on top of that will + be fragile and prone to mismaintenance. + """ +{#- + # CONSTANTS +-#} +{%- for c in type.constants %} + {%- set target -%} + {{ c|id }}: {{ ''.ljust(type.constants|longest_id_length - c|id|length) }}{{ strict_type_annotation(c.data_type) }} + {%- endset %} + {%- if c.data_type is BooleanType %} + {{ target }} = {{ c.value.native_value }} + + {%- elif c.data_type is IntegerType %} + {{ target }} = {{ c.value.as_native_integer() }} + + {%- elif c.data_type is FloatType %} + {{ target }} = {{ c.value.native_value.numerator }} / {{ c.value.native_value.denominator }} + + {%- else -%}{%- assert False -%} + {%- endif %} + {{- '\n' if loop.last else '' -}} +{%- endfor %} + def __init__(self + {%- if type.inner_type is UnionType -%}, *{%- endif -%} + {%- for f in type.fields_except_padding -%} + , + {{ f|id }}: {{ ''.ljust(type.fields|longest_id_length - f|id|length) -}} + None | {{ relaxed_type_annotation(f.data_type) }} = None + {%- endfor -%} + ) -> None: + """ + {{ type.full_name }}.{{ type.version.major }}.{{ type.version.minor }} + Raises ValueError if any of the primitive values are outside the permitted range, regardless of the cast mode. + {%- if type.inner_type is UnionType %} + If no parameters are provided, the first field will be default-initialized and selected. + If one parameter is provided, it will be used to initialize and select the field under the same name. + If more than one parameter is provided, a ValueError will be raised. + {%- endif %} +{%- for f in type.fields_except_padding %} + :param {{ f|id }}: {{ ''.ljust(type.fields|longest_id_length - f|id|length) }}{{ f }} +{%- endfor %} + """ +{%- if type.deprecated %} + _warnings_.warn('Data type {{ type }} is deprecated', DeprecationWarning) +{% endif -%} + +{#- + # FIELD INITIALIZATION +-#} +{%- if type.inner_type is not UnionType -%} + {%- for f in type.fields_except_padding %} + self._{{ f|id }}: {{ ''.ljust(type.fields|longest_id_length - f|id|length) -}} + {{- strict_type_annotation(f.data_type) }} + {{- '\n' if loop.last else '' -}} + {%- endfor %} + {%- for f in type.fields_except_padding %} + {%- if f.data_type is BooleanType %} + self.{{ f|id }} = {{ f|id }} if {{ f|id }} is not None else False + + {%- elif f.data_type is IntegerType %} + self.{{ f|id }} = {{ f|id }} if {{ f|id }} is not None else 0 # type: ignore + + {%- elif f.data_type is FloatType %} + self.{{ f|id }} = {{ f|id }} if {{ f|id }} is not None else 0.0 # type: ignore + + {%- elif f.data_type is FixedLengthArrayType %} + if {{ f|id }} is None: + {%- if f.data_type.element_type is CompositeType %} + self.{{ f|id }} = _np_.array([{{ f.data_type.element_type|full_reference_name }}() {# -#} + for _ in range({{ f.data_type.capacity }})], {# -#} + {{ f.data_type.element_type|numpy_scalar_type }}) + {%- else %} + self.{{ f|id }} = _np_.zeros({{ f.data_type.capacity }}, {# -#} + {{ f.data_type.element_type|numpy_scalar_type }}) + {%- endif %} + else: + {{ assign_array(f, f|id) | indent(8) }} + + {%- elif f.data_type is VariableLengthArrayType %} + if {{ f|id }} is None: + self.{{ f|id }} = _np_.array([], {{ f.data_type.element_type|numpy_scalar_type }}) + else: + {{ assign_array(f, f|id) | indent(8) }} + + {%- elif f.data_type is CompositeType %} + if {{ f|id }} is None: + self.{{ f|id }} = {{ f.data_type|full_reference_name }}() + elif isinstance({{ f|id }}, {{ f.data_type|full_reference_name }}): + self.{{ f|id }} = {{ f|id }} + else: + raise ValueError(f'{{ f|id }}: expected {{ f.data_type|full_reference_name }} ' + f'got {type({{ f|id }}).__name__}') + + {%- else -%}{%- assert False -%} + {%- endif %} + {% else %} + pass +{# #} + {%- endfor %} +{%- else %} {#- IS UNION (guaranteed to contain at least 2 fields none of which are padding) #} + {%- for f in type.fields %} + self._{{ f|id }}: {{ ''.ljust(type.fields|longest_id_length - f|id|length) -}} + None | {{ strict_type_annotation(f.data_type) }} = None + {%- endfor %} + _init_cnt_: int = 0 + {% for f in type.fields %} + if {{ f|id }} is not None: + _init_cnt_ += 1 + self.{{ f|id }} = {{ f|id }} # type: ignore + {% endfor %} + if _init_cnt_ == 0: + {%- set f = type.fields[0] -%} + {%- if f.data_type is BooleanType %} + self.{{ f|id }} = False + + {%- elif f.data_type is IntegerType %} + self.{{ f|id }} = 0 + + {%- elif f.data_type is FloatType %} + self.{{ f|id }} = 0.0 + + {%- elif f.data_type is FixedLengthArrayType %} + {%- if f.data_type.element_type is CompositeType %} + self.{{ f|id }} = _np_.array([{{ f.data_type.element_type|full_reference_name }}() {# -#} + for _ in range({{ f.data_type.capacity }})], {# -#} + {{ f.data_type.element_type|numpy_scalar_type }}) + {%- else %} + self.{{ f|id }} = _np_.zeros({{ f.data_type.capacity }}, {# -#} + {{ f.data_type.element_type|numpy_scalar_type }}) + {%- endif %} + + {%- elif f.data_type is VariableLengthArrayType %} + self.{{ f|id }} = _np_.array([], {{ f.data_type.element_type|numpy_scalar_type }}) + + {%- elif f.data_type is CompositeType %} + self.{{ f|id }} = {{ f.data_type|full_reference_name }}() + + {%- else -%}{%- assert False -%} + {%- endif %} # Default initialization + elif _init_cnt_ == 1: + pass # A value is already assigned, nothing to do + else: + raise ValueError(f'Union cannot hold values of more than one field') +{% endif %} + +{#- + # FIELD ACCESSORS AND MUTATORS +-#} +{%- for f in type.fields_except_padding %} + @property + def {{ f|id }}(self) -> {{ "None | " * (type.inner_type is UnionType) }}{{ strict_type_annotation(f.data_type) }}: + """ + {{ f }} + {%- if f.data_type is VariableLengthArrayType and f.data_type.string_like %} + DSDL does not support strings natively yet. To interpret this array as a string, + use tobytes() to convert the NumPy array to bytes, and then decode() to convert bytes to string: + .{{ f|id }}.tobytes().decode() + When assigning a string to this property, no manual conversion is necessary (it will happen automatically). + {%- endif %} + The setter raises ValueError if the supplied value exceeds the valid range or otherwise inapplicable. + """ + return self._{{ f|id }} + + @{{ f|id }}.setter + def {{ f|id }}(self, x: {{ relaxed_type_annotation(f.data_type) }}) -> None: + {%- if f.data_type is BooleanType %} + self._{{ f|id }} = bool(x) # Cast to bool implements saturation + + {%- elif f.data_type is IntegerType %} + """Raises ValueError if the value is outside of the permitted range, regardless of the cast mode.""" + x = int(x) + if {{ f.data_type.inclusive_value_range.min }} <= x <= {{ f.data_type.inclusive_value_range.max }}: + self._{{ f|id }} = x + else: + raise ValueError(f'{{ f|id }}: value {x} is not in [{{ f.data_type.inclusive_value_range.min }}, {# -#} + {{ f.data_type.inclusive_value_range.max }}]') + + {%- elif f.data_type is FloatType %} + """Raises ValueError if the value is finite and outside of the permitted range, regardless of the cast mode.""" + {#- We do not emit range check for float64 because its range matches that of the native Python's float. #} + {%- if f.data_type.bit_length < 64 %} + x = float(x) + in_range = {{ f.data_type.inclusive_value_range.min }}.0 <= x <= {{ f.data_type.inclusive_value_range.max }}.0 + if in_range or not _np_.isfinite(x): + self._{{ f|id }} = x + else: + raise ValueError(f'{{ f|id }}: value {x} is not in [{{ f.data_type.inclusive_value_range.min }}, {# -#} + {{ f.data_type.inclusive_value_range.max }}]') + {%- else %} + self._{{ f|id }} = float(x) # Range check not required + {%- endif %} + + {%- elif f.data_type is ArrayType %} + {{ assign_array(f, 'x') | indent(4) }} + + {%- elif f.data_type is CompositeType %} + if isinstance(x, {{ f.data_type|full_reference_name }}): + self._{{ f|id }} = x + else: + raise ValueError(f'{{ f|id }}: expected {{ f.data_type|full_reference_name }} got {type(x).__name__}') + + {%- else -%}{%- assert False -%} + {%- endif %} + {%- if type.inner_type is UnionType %} + {%- for z in type.fields if z.name != f.name %} + self._{{ z|id }} = None + {%- endfor %} + {%- endif %} +{% endfor -%} +{# + # SERIALIZATION METHODS + #} + # noinspection PyProtectedMember + def _serialize_(self, _ser_: _Serializer_) -> None: + {{ serialize(type) | remove_blank_lines | indent }} + + # noinspection PyProtectedMember + @staticmethod + def _deserialize_(_des_: _Deserializer_) -> {{ full_class_name }}: + {{ deserialize(type, full_class_name) | remove_blank_lines | indent }} + assert isinstance(self, {{ full_class_name }}) + return self +{# + # PYTHON DATA MODEL + #} + def __repr__(self) -> str: +{%- if type.inner_type is not UnionType %} + _o_0_ = ', '.join([ + {%- for f in type.fields_except_padding %} + '{{ f.name }}=%s' % {{ printable_field_representation(f) }}, + {%- endfor %} + ]) +{%- else %} {#- UNION #} + _o_0_ = '(MALFORMED UNION)' + {%- for f in type.fields %} + if self.{{ f|id }} is not None: + _o_0_ = '{{ f.name }}=%s' % {{ printable_field_representation(f) }} + {%- endfor %} +{%- endif %} + return f'{{ type.full_name }}.{{ type.version.major }}.{{ type.version.minor }}({_o_0_})' +{# + # PYDSDL TYPE DESCRIPTOR + #} + {%- if T.has_fixed_port_id %} + _FIXED_PORT_ID_ = {{ T.fixed_port_id|int }} + {%- endif %} + {%- assert type.extent % 8 == 0 %} + _EXTENT_BYTES_ = {{ type.extent // 8 }} + + {% set meta_type = type.__class__.__name__ -%} + # The big, scary blog of opaque data below contains a serialized PyDSDL object with the metadata of the + # DSDL type this class is generated from. It is needed for reflection and runtime introspection. + # Eventually we should replace this with ad-hoc constants such that no blob is needed and the generated code + # is not dependent on PyDSDL. + _MODEL_: _pydsdl_.{{ meta_type }} = _restore_constant_( + {{ type | pickle | indent(8) }} + ) + assert isinstance(_MODEL_, _pydsdl_.{{ meta_type }}) +{%- endmacro -%} + +{#- + # DEFINITIONS. + #} + + +def _restore_constant_(encoded_string: str) -> object: + import pickle, gzip, base64 + return pickle.loads(gzip.decompress(base64.b85decode(encoded_string))) + + +{% block contents %}{% endblock %} diff --git a/src/nunavut/lang/py/templates/deserialization.j2 b/src/nunavut/lang/py/templates/deserialization.j2 new file mode 100644 index 00000000..d8213bad --- /dev/null +++ b/src/nunavut/lang/py/templates/deserialization.j2 @@ -0,0 +1,144 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{% macro deserialize(self, self_type_name) -%} + assert _des_.consumed_bit_length % 8 == 0, 'Deserializer is not aligned' + _base_offset_ = _des_.consumed_bit_length + {% set t = self.inner_type %} +{% if t is StructureType %} + {% set field_ref_map = {} %} + {% for f, offset in t.iterate_fields_with_offsets() %} + {% if f is not padding %} + {% set field_ref = 'f'|to_template_unique_name %} + {% do field_ref_map.update({f: field_ref}) %} + # Temporary {{ field_ref }} holds the value of "{{ f.name }}" + {{ _deserialize_any(f.data_type, field_ref, offset) }} + {% else %} + {{ _deserialize_any(f.data_type, '[void field does not require a reference]', offset) }} + {% endif %} + {% endfor %} + {% set assignment_root -%} + self = {{ self_type_name }}( + {%- endset %} + {{ assignment_root }} + {% for f in t.fields_except_padding %} + {{ f|id }}={{ field_ref_map[f] }} + {{- ')' if loop.last else (',\n' + ' ' * (4 + assignment_root|length)) -}} + {% else %} + ) + {% endfor %} +{% elif t is UnionType %} + {% set tag_ref = 'tag'|to_template_unique_name %} + {{ _deserialize_integer(t.tag_field_type, tag_ref, 0|bit_length_set) }} + {% for f, offset in t.iterate_fields_with_offsets() %} + {# We generate new temporary for each variant to prevent MyPy from complaining. #} + {% set field_ref = 'uni'|to_template_unique_name %} + {{ 'if' if loop.first else 'elif' }} {{ tag_ref }} == {{ loop.index0 }}: + {{ _deserialize_any(f.data_type, field_ref, offset)|indent }} + self = {{ self_type_name }}({{ f|id }}={{ field_ref }}) + {% endfor %} + else: + raise _des_.FormatError(f'{{ t }}: Union tag value { {{- tag_ref -}} } is invalid') +{% else %}{% assert False %}{# Delimited type is not expected in this context. #} +{% endif %} + _des_.pad_to_alignment({{ self.alignment_requirement }}) + assert {{ t.bit_length_set.min }} <= (_des_.consumed_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \ + 'Bad deserialization of {{ self }}' +{%- endmacro %} + + +{% macro _deserialize_integer(t, ref, offset) %} +{% if t.standard_bit_length and offset.is_aligned_at_byte() %} + {{ ref }} = _des_.fetch_aligned_{{ 'i' if t is SignedIntegerType else 'u' }}{{ t.bit_length }}() +{% else %} + {% set signedness = 'signed' if t is SignedIntegerType else 'unsigned' %} + {{ ref }} = _des_.fetch_{{ offset|alignment_prefix }}_{{ signedness }}({{ t.bit_length }}) +{% endif %} +{% endmacro %} + + +{% macro _deserialize_fixed_length_array(t, ref, offset) %} +{% if t.element_type is BooleanType %} + {{ ref }} = _des_.fetch_{{ offset|alignment_prefix }}_array_of_bits({{ t.capacity }}) +{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %} + {{ ref }} = _des_.fetch_{{ offset|alignment_prefix -}} + _array_of_standard_bit_length_primitives({{ t.element_type|numpy_scalar_type }}, {{ t.capacity }}) +{% else %} + {# Element offset is the superposition of each individual element offsets plus the array's own offset. + # For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}. + # We can also unroll element deserialization for small arrays (e.g., below ~10 elements) to take advantage of + # spurious alignment of elements but the benefit of such optimization is believed to be negligible. #} + {% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %} + {% set element_ref = 'e'|to_template_unique_name %} + {% set index_ref = 'i'|to_template_unique_name %} + {{ ref }} = _np_.empty({{ t.capacity }}, {{ t.element_type|numpy_scalar_type }}) # type: ignore + for {{ index_ref }} in range({{ t.capacity }}): + {{ _deserialize_any(t.element_type, element_ref, element_offset)|indent }} + {{ ref }}[{{ index_ref }}] = {{ element_ref }} +{% endif %} + assert len({{ ref }}) == {{ t.capacity }}, '{{ t }}' +{% endmacro %} + + +{% macro _deserialize_variable_length_array(t, ref, offset) %} + # Length field byte-aligned: {{ offset.is_aligned_at_byte() }}; {# -#} + all elements byte-aligned: {{ (offset + t.bit_length_set).is_aligned_at_byte() }}. + {% set length_ref = 'len'|to_template_unique_name %} + {{ _deserialize_integer(t.length_field_type, length_ref, offset) }} + assert {{ length_ref }} >= 0 + if {{ length_ref }} > {{ t.capacity }}: + raise _des_.FormatError(f'Variable array length prefix { {{- length_ref -}} } > {{ t.capacity }}') +{% if t.element_type is BooleanType %} + {{ ref }} = _des_.fetch_{{ (offset + t.length_field_type.bit_length)|alignment_prefix -}} + _array_of_bits({{ length_ref }}) +{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %} + {{ ref }} = _des_.fetch_{{ (offset + t.length_field_type.bit_length)|alignment_prefix -}} + _array_of_standard_bit_length_primitives({{ t.element_type|numpy_scalar_type }}, {{ length_ref }}) +{% else %} + {% set element_ref = 'e'|to_template_unique_name %} + {% set index_ref = 'i'|to_template_unique_name %} + {{ ref }} = _np_.empty({{ length_ref }}, {{ t.element_type|numpy_scalar_type }}) # type: ignore + for {{ index_ref }} in range({{ length_ref }}): + {{ _deserialize_any(t.element_type, element_ref, offset + t.bit_length_set)|indent }} + {{ ref }}[{{ index_ref }}] = {{ element_ref }} +{% endif %} + assert len({{ ref }}) <= {{ t.capacity }}, '{{ t }}' +{% endmacro %} + + +{% macro _deserialize_any(t, ref, offset) %} + {% if t.alignment_requirement > 1 %} + _des_.pad_to_alignment({{ t.alignment_requirement }}) + {% endif %} + {%- if t is VoidType -%} _des_.skip_bits({{ t.bit_length }}) + {%- elif t is BooleanType -%} {{ ref }} = _des_.fetch_unaligned_bit() + {%- elif t is IntegerType -%} {{ _deserialize_integer(t, ref, offset) }} + {%- elif t is FloatType -%} {{ ref }} = _des_.fetch_{{ offset|alignment_prefix }}_f{{ t.bit_length }}() + {%- elif t is FixedLengthArrayType -%} {{ _deserialize_fixed_length_array(t, ref, offset) }} + {%- elif t is VariableLengthArrayType -%}{{ _deserialize_variable_length_array(t, ref, offset) }} + {%- elif t is CompositeType -%} + {% if t is DelimitedType %} + # Delimited deserialization of {{ t }}, extent {{ t.extent }} + _dh_ = _des_.fetch_aligned_u32() # Read the delimiter header. + if _dh_ * 8 > _des_.remaining_bit_length: + raise _des_.FormatError(f'Delimiter header specifies {_dh_ * 8} bits, ' + f'but the remaining length is only {_des_.remaining_bit_length} bits') + _nested_ = _des_.fork_bytes(_dh_) + _des_.skip_bits(_dh_ * 8) + {{ ref }} = {{ t|full_reference_name }}._deserialize_(_nested_) + del _nested_ + {% else %} + {{ ref }} = {{ t|full_reference_name }}._deserialize_(_des_) + {% endif %} + {%- else %}{% assert False %} + {%- endif %} + {% if t is CompositeType %} + assert _des_.consumed_bit_length % {{ t.alignment_requirement }} == 0, 'Nested object alignment error' + {% endif %} + {% if t is not CompositeType and t.alignment_requirement > 1 %} + _des_.pad_to_alignment({{ t.alignment_requirement }}) + {% endif %} +{% endmacro %} diff --git a/src/nunavut/lang/py/templates/serialization.j2 b/src/nunavut/lang/py/templates/serialization.j2 new file mode 100644 index 00000000..bc909d56 --- /dev/null +++ b/src/nunavut/lang/py/templates/serialization.j2 @@ -0,0 +1,171 @@ +{#- + # Copyright (c) 2019 OpenCyphal + # This software is distributed under the terms of the MIT License. + # Author: Pavel Kirienko +-#} + +{% macro serialize(self) -%} + assert _ser_.current_bit_length % 8 == 0, 'Serializer is not aligned' + _base_offset_ = _ser_.current_bit_length + {% set t = self.inner_type %} +{% if t is StructureType %} + {% for f, offset in t.iterate_fields_with_offsets() %} + {{ _serialize_any(f.data_type, 'self.' + (f|id), offset) }} + {% endfor %} +{% elif t is UnionType %} + {% for f, offset in t.iterate_fields_with_offsets() %} + {% set field_ref = 'self.' + (f|id) %} + {{ 'if' if loop.first else 'elif' }} {{ field_ref }} is not None: # Union tag {{ loop.index0 }} + {{ _serialize_integer(t.tag_field_type, loop.index0|string, 0|bit_length_set)|indent }} + {{ _serialize_any(f.data_type, field_ref, offset)|indent }} + {% endfor %} + else: + raise RuntimeError('Malformed union {{ t }}') +{% else %}{% assert False %}{# Delimited type is not expected in this context. #} +{% endif %} + _ser_.pad_to_alignment({{ self.alignment_requirement }}) + assert {{ t.bit_length_set.min }} <= (_ser_.current_bit_length - _base_offset_) <= {{ t.bit_length_set.max }}, \ + 'Bad serialization of {{ self }}' +{%- endmacro %} + + +{% macro _serialize_integer(t, ref, offset) %} +{% if t is saturated %} {# Note that value ranges are internally represented as rationals. #} + {% set ref = 'max(min(%s, %s), %s)'|format(ref, t.inclusive_value_range.max, t.inclusive_value_range.min) %} +{% endif %} +{% if t.standard_bit_length and offset.is_aligned_at_byte() %} + _ser_.add_aligned_{{ 'i' if t is SignedIntegerType else 'u' }}{{ t.bit_length }}({{ ref }}) +{% else %} + {% set signedness = 'signed' if t is SignedIntegerType else 'unsigned' %} + _ser_.add_{{ offset|alignment_prefix }}_{{ signedness }}({{ ref }}, {{ t.bit_length }}) +{% endif %} +{% endmacro %} + + +{% macro _serialize_float(t, ref, offset) %} +{% set fun -%} + _ser_.add_{{ offset|alignment_prefix }}_f{{ t.bit_length }} +{%- endset %} +{# Note that value ranges are internally represented as rationals. #} +{% if t is saturated %} + {# We do not emit saturation code for float64 because its range matches that of the native Python's float. #} + {% if t.bit_length < 64 %} + if _np_.isfinite({{ ref }}): + if {{ ref }} > {{ t.inclusive_value_range.max }}.0: + {{ fun }}({{ t.inclusive_value_range.max }}.0) + elif {{ ref }} < {{ t.inclusive_value_range.min }}.0: + {{ fun }}({{ t.inclusive_value_range.min }}.0) + else: + {{ fun }}({{ ref }}) + else: + {{ fun }}({{ ref }}) + {% else %} + # Saturation not required due to compatible native representation of "{{ t }}" + {{ fun }}({{ ref }}) + {% endif %} +{% else %} + {{ fun }}({{ ref }}) +{% endif %} +{% endmacro %} + + +{% macro _serialize_fixed_length_array(t, ref, offset) %} + assert len({{ ref }}) == {{ t.capacity }}, '{{ ref }}: {{ t }}' +{# Saturation of bool[] or standard-bit-length primitive arrays is not needed because the range of native + # representations matches that of the final serialized value. Saturation is only needed in the case of elementwise + # serialization, which is implemented in the corresponding type serialization macros. #} +{% if t.element_type is BooleanType %} + _ser_.add_{{ offset|alignment_prefix }}_array_of_bits({{ ref }}) +{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %} + _ser_.add_{{ offset|alignment_prefix -}}_array_of_standard_bit_length_primitives({{ ref }}) +{% else %} + {# Element offset is the superposition of each individual element offsets plus the array's own offset. + # For example, an array like uint8[3] offset by 16 bits would have its element_offset = {16, 24, 32}. + # We can also unroll element serialization for small arrays (e.g., below ~10 elements) to take advantage of + # spurious alignment of elements but the benefit of such optimization is believed to be negligible. #} + {% set element_offset = offset + t.element_type.bit_length_set.repeat_range(t.capacity - 1) %} + {% set element_ref = 'elem'|to_template_unique_name %} + # Element offset: {{ element_offset }} + for {{ element_ref }} in {{ ref }}: + {{ _serialize_any(t.element_type, element_ref, element_offset)|indent }} +{% endif %} +{% endmacro %} + + +{% macro _serialize_variable_length_array(t, ref, offset) %} + # Variable-length array: length field byte-aligned: {{ offset.is_aligned_at_byte() }}; {# -#} + all elements byte-aligned: {{ (offset + t.bit_length_set).is_aligned_at_byte() }}. + assert len({{ ref }}) <= {{ t.capacity }}, '{{ ref }}: {{ t }}' + {{ _serialize_integer(t.length_field_type, 'len(%s)'|format(ref), offset) }} +{# Saturation of bool[] or standard-bit-length primitive arrays is not needed because the range of native + # representations matches that of the final serialized value. Saturation is only needed in the case of elementwise + # serialization, which is implemented in the corresponding type serialization macros. #} +{% if t.element_type is BooleanType %} + _ser_.add_{{ (offset + t.length_field_type.bit_length)|alignment_prefix }}_array_of_bits({{ ref }}) +{% elif t.element_type is PrimitiveType and t.element_type.standard_bit_length %} + _ser_.add_{{ (offset + t.length_field_type.bit_length)|alignment_prefix -}} + _array_of_standard_bit_length_primitives({{ ref }}) +{% else %} + {% set element_ref = 'elem'|to_template_unique_name %} + for {{ element_ref }} in {{ ref }}: + {{ _serialize_any(t.element_type, element_ref, offset + t.bit_length_set)|indent }} +{% endif %} +{% endmacro %} + + +{% macro _serialize_any(t, ref, offset) %} + {% if t.alignment_requirement > 1 %} + _ser_.pad_to_alignment({{ t.alignment_requirement }}) + {% endif %} + {%- if t is VoidType -%} _ser_.skip_bits({{ t.bit_length }}) + {%- elif t is BooleanType -%} _ser_.add_unaligned_bit({{ ref }}) + {%- elif t is IntegerType -%} {{ _serialize_integer(t, ref, offset) }} + {%- elif t is FloatType -%} {{ _serialize_float(t, ref, offset) }} + {%- elif t is FixedLengthArrayType -%} {{ _serialize_fixed_length_array(t, ref, offset) }} + {%- elif t is VariableLengthArrayType -%} {{ _serialize_variable_length_array(t, ref, offset) }} + {%- elif t is CompositeType -%} + {% if t is DelimitedType %} + {% if not t.inner_type.bit_length_set.fixed_length %} + {# Instead of the outer extent, we use the inner extent, which equals the max bit length and is a + # tighter bound than the user-defined extent. + # This is safe because when serializing we always know the concrete type. + # This would be unsafe when deserializing, of course. + # See the Specification for details. #} + {% set nested_capacity_bits = t.inner_type.extent + t.delimiter_header_type.bit_length %} + {% assert nested_capacity_bits % 8 == 0 %} + {% set nested_capacity_bytes = nested_capacity_bits // 8 %} + # Delimited serialization of {{ t }}, extent {{ t.extent }}, max bit length {{ t.inner_type.extent }} + _nested_ = _ser_.fork_bytes({{ nested_capacity_bytes }}) # Also includes the length of the delimiter header. + _nested_.skip_bits({{ t.delimiter_header_type.bit_length }}) # Leave space for the delimiter header. + assert _nested_.current_bit_length == {{ t.delimiter_header_type.bit_length }} + {{ ref }}._serialize_(_nested_) + _nested_length_ = _nested_.current_bit_length - {{ t.delimiter_header_type.bit_length }} + del _nested_ + assert {{ t.inner_type.bit_length_set.min }} <= _nested_length_ <= {{ t.inner_type.bit_length_set.max }} + assert _nested_length_ % 8 == 0 + _ser_.add_aligned_u32(_nested_length_ // 8) # Jump back and serialize the delimiter header. + _ser_.skip_bits(_nested_length_) # Return to the current offset. + {% else %} + {# Optional optimization: if the nested object is fixed-length, no need to fork the serializer. #} + {% set length_bits = t.inner_type.bit_length_set.max %} + {% assert length_bits == t.inner_type.bit_length_set.min %} + {% assert length_bits % 8 == 0 %} + {% set length_bytes = length_bits // 8 %} + # Delimited serialization of {{ t }}, fixed bit length {{ length_bits }} ({{ length_bytes }} bytes) + _ser_.add_aligned_u32({{ length_bytes }}) # Delimiter header is constant in this case. + _ser_base_offset_ = _ser_.current_bit_length + {{ ref }}._serialize_(_ser_) + assert _ser_.current_bit_length - _ser_base_offset_ == {{ length_bits }} + {% endif %} + {% else %} + {{ ref }}._serialize_(_ser_) + {% endif %} + {%- else %}{% assert False %} + {%- endif %} + {% if t is CompositeType %} + assert _ser_.current_bit_length % {{ t.alignment_requirement }} == 0, 'Nested object alignment error' + {% endif %} + {% if t is not CompositeType and t.alignment_requirement > 1 %} + _ser_.pad_to_alignment({{ t.alignment_requirement }}) + {% endif %} +{% endmacro %} diff --git a/tox.ini b/tox.ini index 6d267813..4686c7c3 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,8 @@ # -# We test nunavut using python 3.6 - 3.11. -# # The standard version to develop against is 3.10. # [tox] -envlist = {py36,py37,py38,py39,py310,py311}-{test,nnvg,doctest,rstdoctest},lint,report,docs +envlist = {py37,py38,py39,py310,py311}-{test,nnvg,doctest,rstdoctest},lint,report,docs [base] @@ -42,7 +40,7 @@ log_level = DEBUG log_cli = true log_cli_level = WARNING addopts: --keep-generated -norecursedirs = submodules .* build* +norecursedirs = submodules .* build* verification .tox # The fill fixtures deprecation warning comes from Sybil, which we don't have any control over. Remove when updated. filterwarnings = error diff --git a/verification/nunavut_test_types/test0/if/B.1.0.dsdl b/verification/nunavut_test_types/test0/if/B.1.0.dsdl new file mode 100644 index 00000000..4892e415 --- /dev/null +++ b/verification/nunavut_test_types/test0/if/B.1.0.dsdl @@ -0,0 +1,5 @@ +@deprecated +@union +C.1.0[2] x +C.1.0[<=2] y +@sealed diff --git a/verification/nunavut_test_types/test0/if/C.1.0.dsdl b/verification/nunavut_test_types/test0/if/C.1.0.dsdl new file mode 100644 index 00000000..6bdbb833 --- /dev/null +++ b/verification/nunavut_test_types/test0/if/C.1.0.dsdl @@ -0,0 +1,5 @@ +@union +@deprecated +uint8 x +int8 y +@sealed diff --git a/verification/nunavut_test_types/test0/if/del.1.0.dsdl b/verification/nunavut_test_types/test0/if/del.1.0.dsdl new file mode 100644 index 00000000..1ba12db6 --- /dev/null +++ b/verification/nunavut_test_types/test0/if/del.1.0.dsdl @@ -0,0 +1,6 @@ +# The name of this type is a Python keyword. +@deprecated +void8 +B.1.0[2] else # This is a Python/C keyword. +B.1.0[<=2] raise # This is a Python keyword. +@sealed diff --git a/verification/nunavut_test_types/test0/regulated/RGB888_3840x2748.0.1.dsdl b/verification/nunavut_test_types/test0/regulated/RGB888_3840x2748.0.1.dsdl new file mode 100644 index 00000000..e1f80241 --- /dev/null +++ b/verification/nunavut_test_types/test0/regulated/RGB888_3840x2748.0.1.dsdl @@ -0,0 +1,17 @@ +# This large data type is needed to ensure there are no scalability issues in the generated code. +# For example, string representations should be constructible even for very large data types. + +@deprecated + +uint16 PIXELS_PER_ROW = 3840 +uint16 ROWS_PER_IMAGE = 2748 +uint32 PIXELS_PER_IMAGE = PIXELS_PER_ROW * ROWS_PER_IMAGE + +uavcan.time.SynchronizedTimestamp.1.0 timestamp # Image capture time +void8 + +@assert _offset_ == {64} +uint8[PIXELS_PER_IMAGE * 3] pixels # Row major, top-left pixel first, color ordering RGB +# TODO: replace "uint8" with "byte" after https://github.com/OpenCyphal/pydsdl/pull/97 is in. + +@sealed diff --git a/verification/python/README.md b/verification/python/README.md new file mode 100644 index 00000000..bd099ab9 --- /dev/null +++ b/verification/python/README.md @@ -0,0 +1,15 @@ +# Python codegen verification package + +This directory contains a PyTest suite that verifies the Python generation backend of Nunavut. +It is not part of the Nunavut test suite but rather a completely standalone component, +much like the C and C++ verification suites are not part of the Nunavut's own tests. +The Python versions targeted by the Python codegen, and by this suite, may differ from those of Nunavut itself. + +This suite has to be isolated from the outer Nunavut because it may need to test various configurations in +different environments, which would be hard to reconcile with the normal Nunavut testing strategy. + +To run the suite manually, simply invoke Nox as you normally would: + +```sh +nox +``` diff --git a/verification/python/generated_code_requirements.txt b/verification/python/generated_code_requirements.txt new file mode 100644 index 00000000..07278413 --- /dev/null +++ b/verification/python/generated_code_requirements.txt @@ -0,0 +1,5 @@ +# This file lists the dependencies of generated Python code. +# Observe that Nunavut itself is not required for use of the generated code. +# This file is relied on by the verification suite to ensure there are no implicit undocumented dependencies. + +numpy ~= 1.24 diff --git a/verification/python/noxfile.py b/verification/python/noxfile.py new file mode 100644 index 00000000..01b9258e --- /dev/null +++ b/verification/python/noxfile.py @@ -0,0 +1,124 @@ +# Copyright (c) 2023 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko +# type: ignore + +import os +import shutil +from pathlib import Path +import nox + + +PYTHONS = ["3.8", "3.9", "3.10", "3.11"] + +nox.options.error_on_external_run = True + +# Please keep these updated if the project directory is changed. +SUITE_DIR = Path(__file__).resolve().parent +SUITE_SRC_DIR = SUITE_DIR / "suite" +VERIFICATION_DIR = SUITE_DIR.parent +NUNAVUT_DIR = VERIFICATION_DIR.parent + +PUBLIC_REGULATED_DATA_TYPES_DIR = NUNAVUT_DIR / "submodules" / "public_regulated_data_types" +TEST_TYPES_DIR = VERIFICATION_DIR / "nunavut_test_types" + + +@nox.session(python=False) +def clean(session): + for w in [ + "*.egg-info", + "nunavut_out", + ".coverage*", + "html*", + ".*cache", + ".*compiled", + "*.log", + "*.tmp", + ".nox", + ]: + for f in Path.cwd().glob(w): + session.log(f"Removing: {f}") + if f.is_dir(): + shutil.rmtree(f, ignore_errors=True) + else: + f.unlink(missing_ok=True) + + +@nox.session(python=PYTHONS) +def test(session): + session.install("-e", str(NUNAVUT_DIR)) + session.install("-e", ".") + session.install("-r", "generated_code_requirements.txt") + session.install( + "pytest ~= 7.3", + "coverage ~= 7.2", + "mypy ~= 1.2", + "pylint ~= 2.17", + ) + + # The tmp dir will contain the DSDL-generated packages. We do not want to contaminate the source tree. + # Invoke Nunavut manually prior to running the tests because the generated packages must already be available. + # Invoking Nunavut from within PyTest is not possible because it will not be able to find the generated packages. + root_namespace_dirs = [ + PUBLIC_REGULATED_DATA_TYPES_DIR / "uavcan", + TEST_TYPES_DIR / "test0" / "regulated", + TEST_TYPES_DIR / "test0" / "if", + ] + generated_dir = Path(session.create_tmp()).resolve() + for nsd in root_namespace_dirs: + session.run( + "nnvg", + str(Path(nsd).resolve()), + "--target-language=py", + "--outdir", + str(generated_dir), + env={ + "DSDL_INCLUDE_PATH": os.pathsep.join(map(str, root_namespace_dirs)), + }, + ) + session.log(f"Compilation finished") + + # Run PyTest against the verification suite and the generated code at the same time. + # If there are any doctests or unit tests within the generated code, they will be executed as well. + test_paths = [ + SUITE_SRC_DIR, + generated_dir, + ] + session.run( + "coverage", + "run", + "-m", + "pytest", + *map(str, test_paths), + env={ + "NUNAVUT_VERIFICATION_DSDL_PATH": os.pathsep.join(map(str, root_namespace_dirs)), + "PYTHONPATH": str(generated_dir), + }, + ) + session.run("coverage", "report", "--fail-under=95") + if session.interactive: + session.run("coverage", "html") + report_file = Path.cwd().resolve() / "htmlcov" / "index.html" + session.log(f"OPEN IN WEB BROWSER: file://{report_file}") + + # The static analysis is run in the same session because it relies on the generated code. + session.run( + "mypy", + "--strict", + f"--config-file={NUNAVUT_DIR / 'tox.ini'}", # Inherit the settings from the outer project. Not sure about it. + str(SUITE_SRC_DIR), + *[str(x) for x in generated_dir.iterdir() if x.is_dir() and x.name[0] not in "._"], + ) + session.run( + "pylint", + str(SUITE_SRC_DIR), + env={ + "PYTHONPATH": str(generated_dir), + }, + ) + + +@nox.session(reuse_venv=True) +def black(session): + session.install("black ~= 23.3") + session.run("black", "--check", "suite", "noxfile.py") diff --git a/verification/python/pyproject.toml b/verification/python/pyproject.toml new file mode 100644 index 00000000..f2322484 --- /dev/null +++ b/verification/python/pyproject.toml @@ -0,0 +1,81 @@ +# Settings specific to the Python codegen verification suite only. Not related to Nunavut itself. + +# -------------------------------------------------- PYTEST -------------------------------------------------- +[tool.pytest.ini_options] +python_files = "*.py" +log_level = "DEBUG" +log_cli_level = "WARNING" +log_cli = true +log_file = "pytest.log" +addopts = "--doctest-modules -v" +filterwarnings = [ + "ignore::DeprecationWarning", # Some of the DSDL-generated types are deprecated on purpose. +] + +# -------------------------------------------------- COVERAGE -------------------------------------------------- +[tool.coverage.run] +source = [ + "suite", +] +branch = true + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "raise AssertionError", + "raise NotImplementedError", + "assert False", +] + +# -------------------------------------------------- PYLINT -------------------------------------------------- +[tool.pylint.MASTER] +fail-under = 9.9 + +[tool.pylint.'MESSAGES CONTROL'] +confidence = "UNDEFINED" # Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +disable = [ + "cyclic-import", + "useless-import-alias", + "import-outside-toplevel", + "fixme", + "inconsistent-return-statements", + "unbalanced-tuple-unpacking", + "no-name-in-module", + "superfluous-parens", + "unsubscriptable-object", + "too-few-public-methods", + "import-error", + "no-self-use", + "multiple-statements", + "arguments-differ", + "too-many-statements", + "useless-super-delegation", + "too-many-instance-attributes", + "too-many-public-methods", + "consider-using-f-string", + "unspecified-encoding", + "use-implicit-booleaness-not-comparison", + "too-many-return-statements", # Already covered by the branch limit. +] + +[tool.pylint.REPORTS] +output-format = "colorized" + +[tool.pylint.DESIGN] +max-branches = 20 +max-locals = 20 + +[tool.pylint.FORMAT] +max-line-length = 120 + +[tool.pylint.BASIC] +bad-names = [] +variable-rgx = "[a-z_][a-z0-9_]*" + +# -------------------------------------------------- BLACK -------------------------------------------------- +[tool.black] +line-length = 120 +target-version = ["py311"] +include = ''' +(.*\.pyi?$) +''' diff --git a/verification/python/suite/__init__.py b/verification/python/suite/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/verification/python/suite/conftest.py b/verification/python/suite/conftest.py new file mode 100644 index 00000000..e5db1253 --- /dev/null +++ b/verification/python/suite/conftest.py @@ -0,0 +1,49 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import os +from typing import Sequence, Any +import dataclasses +import logging +import pytest +import pydsdl + + +_logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(frozen=True) +class GeneratedPackageInfo: + models: Sequence[pydsdl.CompositeType] + """ + List of PyDSDL objects describing the source DSDL definitions. + This can be used for arbitrarily complex introspection and reflection. + """ + + +@pytest.fixture(scope="session") +def compiled() -> list[GeneratedPackageInfo]: + """ + Obtains the list of DSDL-generated Python packages with metadata. + This is used to guide the automatic testing process. + """ + out: list[GeneratedPackageInfo] = [] + ns_dirs = os.environ["NUNAVUT_VERIFICATION_DSDL_PATH"].split(os.pathsep) + for nsd in ns_dirs: + _logger.info("Reading DSDL namespace %s", nsd) + composite_types = pydsdl.read_namespace(root_namespace_directory=nsd, lookup_directories=ns_dirs) + if not composite_types: # pragma: no cover + _logger.warning("Empty DSDL namespace: %s", nsd) + continue + out.append(GeneratedPackageInfo(models=composite_types)) + return out + + +def pytest_configure(config: Any) -> None: + """ + See https://docs.pytest.org/en/6.2.x/reference.html#initialization-hooks + """ + del config + logging.getLogger("pydsdl").setLevel(logging.INFO) diff --git a/verification/python/suite/test_builtin_form.py b/verification/python/suite/test_builtin_form.py new file mode 100644 index 00000000..ffe90cdc --- /dev/null +++ b/verification/python/suite/test_builtin_form.py @@ -0,0 +1,134 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import logging +import pytest +import pydsdl +from .util import expand_service_types, make_random_object +from .conftest import GeneratedPackageInfo + + +_logger = logging.getLogger(__name__) + + +def test_builtin_form_manual(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import to_builtin, update_from_builtin + import uavcan.node + import uavcan.register + import uavcan.primitive.array + import uavcan.time + + bi = to_builtin( + uavcan.node.Heartbeat_1_0( + uptime=123456, + health=uavcan.node.Health_1_0(2), + mode=uavcan.node.Mode_1_0(3), + vendor_specific_status_code=0xBA, + ) + ) + assert bi == { + "uptime": 123456, + "health": {"value": 2}, + "mode": {"value": 3}, + "vendor_specific_status_code": 186, + } + + bi = to_builtin( + uavcan.node.GetInfo_1_0.Response( + protocol_version=uavcan.node.Version_1_0(1, 2), + hardware_version=uavcan.node.Version_1_0(3, 4), + software_version=uavcan.node.Version_1_0(5, 6), + software_vcs_revision_id=0xBADC0FFEE0DDF00D, + unique_id=b"0123456789abcdef", + name="org.node.my", + software_image_crc=[0x0DDDEADB16B00B5], + certificate_of_authenticity=list(range(100)), + ) + ) + print(bi) + assert bi == { + "protocol_version": {"major": 1, "minor": 2}, + "hardware_version": {"major": 3, "minor": 4}, + "software_version": {"major": 5, "minor": 6}, + "software_vcs_revision_id": 0xBADC0FFEE0DDF00D, + "unique_id": list(b"0123456789abcdef"), + "name": "org.node.my", + "software_image_crc": [0x0DDDEADB16B00B5], + # The following will have to be changed when strings are supported natively in DSDL: + "certificate_of_authenticity": list(range(100)), + } + + bi = to_builtin( + uavcan.register.Access_1_0.Response( + timestamp=uavcan.time.SynchronizedTimestamp_1_0(1234567890), + mutable=True, + persistent=False, + value=uavcan.register.Value_1_0( + real32=uavcan.primitive.array.Real32_1_0( + [ + 123.456, + -789.123, + float("+inf"), + ] + ) + ), + ) + ) + print(bi) + assert bi == { + "timestamp": {"microsecond": 1234567890}, + "mutable": True, + "persistent": False, + "value": { + "real32": { + "value": [ + pytest.approx(123.456), + pytest.approx(-789.123), + pytest.approx(float("+inf")), + ], + }, + }, + } + + with pytest.raises(ValueError, match=".*field.*"): + bi["nonexistent_field"] = 123 + update_from_builtin(uavcan.register.Access_1_0.Response(), bi) + + +def test_builtin_form_automatic(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import to_builtin, update_from_builtin, get_class + + for info in compiled: + for model in expand_service_types(info.models): + if model.bit_length_set.max / 8 > 1024 * 1024: + _logger.info("Automatic test of %s skipped because the type is too large", model) + continue # Skip large objects because they take forever to convert and test + + obj = make_random_object(model) + bi = to_builtin(obj) + reconstructed = update_from_builtin(get_class(model)(), bi) + + if str(obj) != str(reconstructed) or repr(obj) != repr(reconstructed): # pragma: no branch + if pydsdl.FloatType.__name__ not in repr(model): # pragma: no cover + _logger.info( + "Automatic comparison cannot be performed because the objects of type %s may " + "contain floats. Please implement proper DSDL object comparison methods and " + "update this test to use them.", + model, + ) + _logger.info("Original random object: %r", obj) + _logger.info("Reconstructed object: %r", reconstructed) + _logger.info("Built-in representation: %r", bi) + else: + assert False, f"{obj} != {reconstructed}" + + +def test_issue_147(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import update_from_builtin + from uavcan.register import Access_1_0 + + # Automatic promotion https://github.com/OpenCyphal/pycyphal/issues/147 + valid = update_from_builtin(Access_1_0.Request(), "uavcan.pub.measurement") + assert valid.name.name.tobytes().decode() == "uavcan.pub.measurement" diff --git a/verification/python/suite/test_constants.py b/verification/python/suite/test_constants.py new file mode 100644 index 00000000..397b0b43 --- /dev/null +++ b/verification/python/suite/test_constants.py @@ -0,0 +1,28 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import pydsdl +import pytest +from .util import expand_service_types +from .conftest import GeneratedPackageInfo + + +def test_constants(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import get_class, get_attribute, get_fixed_port_id, get_model + + for info in compiled: + for model in expand_service_types(info.models, keep_services=True): + dtype = get_class(model) + for c in model.constants: + if isinstance(c.data_type, pydsdl.PrimitiveType): # pragma: no branch + reference = c.value + generated = get_attribute(dtype, c.name) + assert isinstance(reference, pydsdl.Primitive) + assert reference.native_value == pytest.approx( + generated + ), "The generated constant does not compare equal against the DSDL source" + fpid_obj = get_fixed_port_id(dtype) + fpid_mod = get_model(dtype).fixed_port_id + assert (fpid_obj == fpid_mod) or (fpid_obj is None) or (fpid_mod is None) diff --git a/verification/python/suite/test_manual.py b/verification/python/suite/test_manual.py new file mode 100644 index 00000000..89240205 --- /dev/null +++ b/verification/python/suite/test_manual.py @@ -0,0 +1,223 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import logging +import numpy +import pytest +from .conftest import GeneratedPackageInfo + + +_logger = logging.getLogger(__name__) + + +def test_manual_assignment(compiled: list[GeneratedPackageInfo]) -> None: + from uavcan.primitive import Unstructured_1_0 as Un, String_1_0 as St + + del compiled + + ob1 = Un(memoryview(b"Hello world")) + assert ob1.value.tobytes().decode() == "Hello world" + + ob2 = St(memoryview(b"Hello world")) + assert ob2.value.tobytes().decode() == "Hello world" + + +def test_manual_del(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import deserialize, get_attribute, set_attribute + import if_ + + del compiled + + # Implicit zero extension + ize = deserialize(if_.del_1_0, [memoryview(b"")]) + assert ize is not None + assert repr(ize) == repr(if_.del_1_0()) + + obj = deserialize( + if_.del_1_0, + _compile_serialized_representation( + # void8 + "00000000" + # B union, second field C.1.0[<=2] y + "00000001" + "00000010" # length 2 elements + # First element C.1.0 + "00000001" # second field selected uint1 y + "00000111" # y = 7 + # Second element C.1.0 + "00000000" # first field selected uint1 x + "00000101" # x = 5 + # B union, first field C.1.0[2] x + "00000000" + # First element C.1.0 + "00000000" # first field selected uint1 x + "00001000" # x = 8 + # Second element C.1.0 + "00000001" # second field selected uint1 y + "00001101" # y = 13 + # empty B.1.0[<=2] y + "00000000" + ), + ) + assert obj is not None + assert obj.else_[0].x is None + assert obj.else_[0].y is not None + assert len(obj.else_[0].y) == 2 + assert obj.else_[0].y[0].x is None + assert obj.else_[0].y[0].y == 7 + assert obj.else_[0].y[1].x == 5 + assert obj.else_[0].y[1].y is None + assert obj.else_[1].x is not None + assert obj.else_[1].y is None + assert obj.else_[1].x[0].x == 8 + assert obj.else_[1].x[0].y is None + assert obj.else_[1].x[1].x is None + assert obj.else_[1].x[1].y == 13 + assert len(obj.raise_) == 0 + + with pytest.raises(AttributeError, match="nonexistent"): + get_attribute(obj, "nonexistent") + + with pytest.raises(AttributeError, match="nonexistent"): + set_attribute(obj, "nonexistent", 123) + + +def test_manual_heartbeat(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import deserialize, get_attribute, set_attribute + import uavcan.node + + del compiled + + # Implicit zero extension + ize = deserialize(uavcan.node.Heartbeat_1_0, [memoryview(b"")]) + assert ize is not None + assert repr(ize) == repr(uavcan.node.Heartbeat_1_0()) + assert ize.uptime == 0 + assert ize.vendor_specific_status_code == 0 + + obj = deserialize( + uavcan.node.Heartbeat_1_0, + _compile_serialized_representation( + _bin(0xEFBE_ADDE, 32), # uptime dead beef in little-endian byte order + "00000010", # health caution + "00000001", # mode initialization + "10101111", # vendor-specific + ), + ) + assert obj is not None + assert obj.uptime == 0xDEADBEEF + assert obj.health.value == uavcan.node.Health_1_0.CAUTION + assert obj.mode.value == uavcan.node.Mode_1_0.INITIALIZATION + assert obj.vendor_specific_status_code == 0b10101111 + + with pytest.raises(AttributeError, match="nonexistent"): + get_attribute(obj, "nonexistent") + + with pytest.raises(AttributeError, match="nonexistent"): + set_attribute(obj, "nonexistent", 123) + + +def test_minor_alias(compiled: list[GeneratedPackageInfo]) -> None: + from regulated.delimited import BDelimited_1, BDelimited_1_1, BDelimited_1_0 + + del compiled + assert BDelimited_1 is not BDelimited_1_0 # type: ignore + assert BDelimited_1 is BDelimited_1_1 + + +def test_delimited(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import serialize, deserialize + from regulated.delimited import A_1_0, A_1_1, BDelimited_1_0, BDelimited_1_1 + from regulated.delimited import CFixed_1_0, CFixed_1_1, CVariable_1_0, CVariable_1_1 + + del compiled + + def u8(x: int) -> bytes: + return int(x).to_bytes(1, "little") + + def u32(x: int) -> bytes: + return int(x).to_bytes(4, "little") + + # Serialize first and check against the reference. + o = A_1_0( + del_=BDelimited_1_0( + var=[CVariable_1_0([1, 2]), CVariable_1_0([3], 4)], + fix=[CFixed_1_0([5, 6])], + ), + ) + print("object below:\n", o) + sr = b"".join(serialize(o)) + del o + # fmt: off + ref = ( + u8(1) # | Union tag of del + + u32(23) # | Delimiter header of BDelimited.1.0 del + + u8(2) # | Array var contains two elements + + u32(4) # | Delimiter header of the first array element + + u8(2) # | Array a contains 2 elements + + u8(1) + u8(2) # | This is the array a + + u8(0) # | Field b left uninitialized + + u32(3) # | Delimiter header of the second array element + + u8(1) # | Array a contains 1 element + + u8(3) # | This is the array a + + u8(4) # | Field b + + u8(1) # | Array fix contains one element + + u32(2) # | Delimiter header of the only array element + + u8(5) + u8(6) # | This is the array a + ) + # fmt: on + print(" ".join(f"{b:02x}" for b in sr)) + assert sr == ref + + # Deserialize using a DIFFERENT MINOR VERSION which requires the implicit zero extension/truncation rules to work. + q = deserialize(A_1_1, [memoryview(sr)]) + assert q + assert q.del_ is not None + assert len(q.del_.var) == 2 + assert len(q.del_.fix) == 1 + assert list(q.del_.var[0].a) == [1, 2] + assert list(q.del_.var[1].a) == [3] # b is implicitly truncated + assert list(q.del_.fix[0].a) == [5, 6, 0] # 3rd is implicitly zero-extended + assert q.del_.fix[0].b == 0 # b is implicitly zero-extended + + # Reverse version switch. + q = A_1_1( + del_=BDelimited_1_1( + var=[CVariable_1_1([11, 22])], + fix=[CFixed_1_1([5, 6, 7], 8), CFixed_1_1([100, 200, 123], 99)], + ), + ) + sr = b"".join(serialize(q)) + del q + print(" ".join(f"{b:02x}" for b in sr)) + p = deserialize(A_1_0, [memoryview(sr)]) + assert p + assert p.del_ is not None + assert len(p.del_.var) == 1 + assert len(p.del_.fix) == 2 + assert list(p.del_.var[0].a) == [11, 22] + assert p.del_.var[0].b == 0 # b is implicitly zero-extended + assert list(p.del_.fix[0].a) == [5, 6] # 3rd is implicitly truncated, b is implicitly truncated + assert list(p.del_.fix[1].a) == [100, 200] # 3rd is implicitly truncated, b is implicitly truncated + + # Delimiter header too large. + assert None is deserialize(A_1_1, [memoryview(b"\x01" + b"\xFF" * 4)]) + + +def _compile_serialized_representation(*binary_chunks: str) -> list[memoryview]: + s = "".join(binary_chunks) + s = s.ljust(len(s) + 8 - len(s) % 8, "0") + assert len(s) % 8 == 0 + byte_sized_chunks = [s[i : i + 8] for i in range(0, len(s), 8)] + byte_list = list(map(lambda x: int(x, 2), byte_sized_chunks)) + out = numpy.array(byte_list, dtype=numpy.uint8) + _logger.debug("Constructed serialized representation: %r --> %s", binary_chunks, out) + return [out.data] + + +def _bin(value: int, width: int) -> str: + out = bin(value)[2:].zfill(width) + assert len(out) == width, f"Value is too wide: {bin(value)} is more than {width} bits wide" + return out diff --git a/verification/python/suite/test_rand.py b/verification/python/suite/test_rand.py new file mode 100644 index 00000000..34a2dd27 --- /dev/null +++ b/verification/python/suite/test_rand.py @@ -0,0 +1,210 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import os +import gc +from typing import Any +import time +import random +import logging +import dataclasses +import numpy +import pytest +import pydsdl +from .util import expand_service_types, make_random_object, are_close +from .conftest import GeneratedPackageInfo + + +_MAX_ALLOWED_SERIALIZATION_DESERIALIZATION_TIME = 90e-3 +""" +Fail the test if any type takes longer than this to serialize or deserialize on average. +This may appear huge but it's necessary to avoid false positives in the CI environment. +""" + +_MAX_RANDOM_SERIALIZED_REPRESENTATION_FRAGMENTS = 1000 +""" +When generating random serialized representations, limit the number of fragments to this value +for performance reasons. Also, a large number of fragments may occasionally cause the test to run out of memory +and be killed, especially so in cloud-hosted CI systems which are always memory-impaired. +""" + +_NUM_RANDOM_SAMPLES = int(os.environ.get("NUNAVUT_PYTHON_TEST_NUM_RANDOM_SAMPLES", 5)) +""" +Set this environment variable to a higher value for a deeper exploration. +""" + +_MAX_EXTENT_BYTES = 99 * 1024**2 +""" +Do not test data types whose extent exceeds this limit. +""" + +_logger = logging.getLogger(__name__) + + +@dataclasses.dataclass(frozen=True) +class _TypeTestStatistics: + mean_serialization_time: float + mean_deserialization_time: float + random_serialized_representation_correctness_ratio: float + + @property + def worst_time(self) -> float: + return max(self.mean_serialization_time, self.mean_deserialization_time) + + +def test_random(compiled: list[GeneratedPackageInfo], caplog: Any) -> None: + from nunavut_support import get_class, serialize, deserialize + + _logger.info( + "Number of random samples: %s. Set the environment variable PYCYPHAL_TEST_NUM_RANDOM_SAMPLES to override.", + _NUM_RANDOM_SAMPLES, + ) + + # The random test intentionally generates a lot of faulty data, which generates a lot of log messages. + # We don't want them to clutter the test output, so we raise the logging level temporarily. + caplog.set_level(logging.WARNING, logger="nunavut_support") + + performance: dict[pydsdl.CompositeType, _TypeTestStatistics] = {} + + for info in compiled: + for model in expand_service_types(info.models, keep_services=True): + if not isinstance(model, pydsdl.ServiceType): + if model.extent > 8 * _MAX_EXTENT_BYTES: + _logger.info("Skipping %s due to excessive size", model) + else: + performance[model] = _test_type(model, _NUM_RANDOM_SAMPLES) + else: + dtype = get_class(model) + with pytest.raises(TypeError): + assert list(serialize(dtype())) + with pytest.raises(TypeError): + deserialize(dtype, [memoryview(b"")]) + + _logger.info("Tested types ordered by serialization speed, %d random samples per type", _NUM_RANDOM_SAMPLES) + _logger.info( + "Columns: random SR correctness ratio; " "mean serialization time [us]; mean deserialization time [us]" + ) + + for ty, stat in sorted(performance.items(), key=lambda kv: -kv[1].worst_time): # pragma: no branch + assert isinstance(stat, _TypeTestStatistics) + _logger.info( + "%-60s %3.0f%% %6.0f %6.0f%s", + ty, + stat.random_serialized_representation_correctness_ratio * 100, + stat.mean_serialization_time * 1e6, + stat.mean_deserialization_time * 1e6, + ("" if stat.worst_time < 1e-3 else "\tSLOW!"), + ) + assert ( + stat.worst_time <= _MAX_ALLOWED_SERIALIZATION_DESERIALIZATION_TIME + ), f"Serialization performance issues detected in type {ty}" + + +def _test_type(model: pydsdl.CompositeType, num_random_samples: int) -> _TypeTestStatistics: + from nunavut_support import get_class, get_model, deserialize + + _logger.debug("Roundtrip serialization test of %s with %d random samples", model, num_random_samples) + dtype = get_class(model) + samples: list[tuple[float, float]] = [_serialize_deserialize(dtype())] + rand_sr_validness: list[bool] = [] + + def once(obj: object) -> tuple[float, float]: + s = _serialize_deserialize(obj) + samples.append(s) + return s + + for index in range(num_random_samples): + ts = time.process_time() + # Forward test: get random object, serialize, deserialize, compare + sample_ser = once(make_random_object(model)) + + # Reverse test: get random serialized representation, deserialize; if successful, serialize again and compare + sr = _make_random_fragmented_serialized_representation(get_model(dtype).bit_length_set) + ob = deserialize(dtype, sr) + rand_sr_validness.append(ob is not None) + sample_des: tuple[float, float] | None = None + if ob: + sample_des = once(ob) + + elapsed = time.process_time() - ts + if elapsed > 1.0: + duration_ser = f"{sample_ser[0] * 1e6:.0f}/{sample_ser[1] * 1e6:.0f}" + duration_des = f"{sample_des[0] * 1e6:.0f}/{sample_des[1] * 1e6:.0f}" if sample_des else "N/A" + _logger.debug( # pylint: disable=logging-fstring-interpolation + f"Random sample {index + 1} of {num_random_samples} took {elapsed:.1f} s; " + f"random SR correct: {ob is not None}; " + f"duration forward/reverse [us]: ({duration_ser})/({duration_des})" + ) + + out = numpy.mean(samples, axis=0) + assert out.shape == (2,) + return _TypeTestStatistics( + mean_serialization_time=out[0], + mean_deserialization_time=out[1], + random_serialized_representation_correctness_ratio=float(numpy.mean(rand_sr_validness)), + ) + + +def _serialize_deserialize(obj: object) -> tuple[float, float]: + from nunavut_support import get_model, serialize, deserialize + + gc.collect() + gc.disable() # Must be disabled, otherwise it induces spurious false-positive performance warnings + + ts = time.process_time() + chunks = list(serialize(obj)) # GC must be disabled while we're in the timed context + ser_sample = time.process_time() - ts + + ts = time.process_time() + d = deserialize(type(obj), chunks) # GC must be disabled while we're in the timed context + des_sample = time.process_time() - ts + + gc.enable() + + assert d is not None + assert type(obj) is type(d) + assert get_model(obj) == get_model(d) + + if not are_close(get_model(obj), obj, d): # pragma: no cover + assert False, f"{obj} != {d}; sr: {bytes().join(chunks).hex()}" # Branched for performance reasons + + # Similar floats may produce drastically different string representations, so if there is at least one float inside, + # we skip the string representation equality check. + if pydsdl.FloatType.__name__ not in repr(get_model(d)): + assert str(obj) == str(d) + assert repr(obj) == repr(d) + + return ser_sample, des_sample + + +def _make_random_fragmented_serialized_representation(bls: pydsdl.BitLengthSet) -> list[memoryview]: + if bls.max < 8 * 1024: # If the BLS appears small, perform numerical expansion and pick a random value. + bit_length = random.choice(list(bls)) + byte_length = (bit_length + 7) // 8 + else: # Otherwise, just use the smallest value because expansion is slow. + bit_length = bls.min + byte_length = (bit_length + 7) // 8 + return _fragment_randomly(numpy.random.randint(0, 256, size=byte_length, dtype=numpy.uint8).data) + + +def _fragment_randomly(data: memoryview) -> list[memoryview]: + try: + n = random.randint(1, min(_MAX_RANDOM_SERIALIZED_REPRESENTATION_FRAGMENTS, len(data))) + except ValueError: + return [data] # Nothing to fragment + else: + q, r = divmod(len(data), n) + idx = [q * i + min(i, r) for i in range(n + 1)] + return [data[idx[i] : idx[i + 1]] for i in range(n)] + + +def test_fragment_randomly() -> None: + assert _fragment_randomly(memoryview(b"")) == [memoryview(b"")] + assert _fragment_randomly(memoryview(b"a")) == [memoryview(b"a")] + for _ in range(100): + size = random.randint(0, 100) + data = numpy.random.randint(0, 256, size=size, dtype=numpy.uint8).data + fragments = _fragment_randomly(data) + assert b"".join(fragments) == data diff --git a/verification/python/suite/test_textual.py b/verification/python/suite/test_textual.py new file mode 100644 index 00000000..528c3360 --- /dev/null +++ b/verification/python/suite/test_textual.py @@ -0,0 +1,47 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import logging +import pydsdl +from .util import expand_service_types, make_random_object +from .conftest import GeneratedPackageInfo + + +_MAX_EXTENT_BYTES = 99 * 1024**2 +""" +Do not test data types whose extent exceeds this limit. +""" + +_logger = logging.getLogger(__name__) + + +def test_textual(compiled: list[GeneratedPackageInfo]) -> None: + from nunavut_support import get_attribute + + def validate(obj: object, s: str) -> None: + for f in model.fields_except_padding: # pylint: disable=undefined-loop-variable + field_present = (f"{f.name}=" in s) or (f"{f.name}_=" in s) + if isinstance(model.inner_type, pydsdl.UnionType): # pylint: disable=undefined-loop-variable + # In unions only the active field is printed. + # The active field may contain nested fields which may be named similarly to other fields + # in the current union, so we can't easily ensure lack of non-active fields in the output. + field_active = get_attribute(obj, f.name) is not None + if field_active: + assert field_present, f"{f.name}: {s}" + else: + # In structures all fields are printed always. + assert field_present, f"{f.name}: {s}" + + for info in compiled: + for model in expand_service_types(info.models): + if model.extent > 8 * _MAX_EXTENT_BYTES: + _logger.info("Skipping %s due to excessive size", model) + continue + _logger.debug("Testing textual representation of %s...", model) + for fn in [str, repr]: + assert callable(fn) + for _ in range(10): + ob = make_random_object(model) + validate(ob, fn(ob)) diff --git a/verification/python/suite/util.py b/verification/python/suite/util.py new file mode 100644 index 00000000..22bc85a0 --- /dev/null +++ b/verification/python/suite/util.py @@ -0,0 +1,143 @@ +# Copyright (c) 2019 OpenCyphal +# This software is distributed under the terms of the MIT License. +# Author: Pavel Kirienko + +from __future__ import annotations +import random +from typing import Iterable, Iterator, Any +import struct +import itertools +import functools +import numpy +import pydsdl + + +def expand_service_types( + models: Iterable[pydsdl.CompositeType], keep_services: bool = False +) -> Iterator[pydsdl.CompositeType]: + """ + Iterates all types in the provided list, expanding each ServiceType into a pair of CompositeType: one for + request, one for response. + """ + for m in models: + if isinstance(m, pydsdl.ServiceType): + yield m.request_type + yield m.response_type + if keep_services: + yield m + else: + yield m + + +def make_random_object(model: pydsdl.SerializableType) -> Any: + """ + Returns an object of the specified DSDL type populated with random data. + """ + from nunavut_support import get_class, set_attribute + + def fifty_fifty() -> bool: + return random.random() >= 0.5 + + if isinstance(model, pydsdl.BooleanType): + return fifty_fifty() + + if isinstance(model, pydsdl.IntegerType): + return random.randint(int(model.inclusive_value_range.min), int(model.inclusive_value_range.max)) + + if isinstance(model, pydsdl.FloatType): # We want inf/nan as well, so we generate int and then reinterpret + int_value = random.randrange(0, 2**model.bit_length) + unpack_fmt, pack_fmt = { + 16: ("e", "H"), + 32: ("f", "I"), + 64: ("d", "Q"), + }[model.bit_length] + fmt_prefix = "<" + (out,) = struct.unpack(fmt_prefix + unpack_fmt, struct.pack(fmt_prefix + pack_fmt, int_value)) + return out + + if isinstance(model, pydsdl.FixedLengthArrayType): + et = model.element_type + if isinstance(et, pydsdl.UnsignedIntegerType) and et.bit_length == 8: # Special case for faster testing + out = numpy.random.randint(0, 256, size=model.capacity, dtype=numpy.uint8) + else: + out = [make_random_object(model.element_type) for _ in range(model.capacity)] + if model.capacity < 10000: + if isinstance(et, pydsdl.UnsignedIntegerType) and et.bit_length <= 8 and fifty_fifty(): + out = bytes(out) + return out + + if isinstance(model, pydsdl.VariableLengthArrayType): + length = random.randint(0, model.capacity) + et = model.element_type + if isinstance(et, pydsdl.UnsignedIntegerType) and et.bit_length == 8: # Special case for faster testing + out = numpy.random.randint(0, 256, size=length, dtype=numpy.uint8) + else: + out = [make_random_object(model.element_type) for _ in range(length)] + if length < 10000: # pragma: no branch + if isinstance(et, pydsdl.UnsignedIntegerType) and et.bit_length <= 8 and fifty_fifty(): + out = bytes(out) + if model.string_like and fifty_fifty(): + try: + out = bytes(out).decode() + except ValueError: + pass + return out + + if isinstance(model, pydsdl.StructureType): + o = get_class(model)() + for f in model.fields_except_padding: + v = make_random_object(f.data_type) + set_attribute(o, f.name, v) + return o + + if isinstance(model, pydsdl.UnionType): + f = random.choice(model.fields) + v = make_random_object(f.data_type) + o = get_class(model)() + set_attribute(o, f.name, v) + return o + + if isinstance(model, pydsdl.DelimitedType): + return make_random_object(model.inner_type) # Unwrap and delegate + + raise TypeError(f"Unsupported type: {type(model)}") # pragma: no cover + + +def are_close(model: pydsdl.SerializableType, a: Any, b: Any) -> bool: + """ + If you ever decided to copy-paste this test function into a production application, + beware that it evaluates (NaN == NaN) as True. This is what we want when testing, + but this is not what most real systems expect. + """ + from nunavut_support import get_model, get_attribute + + with numpy.errstate(invalid="ignore"): # Ignore NaNs + if a is None or b is None: # These occur, for example, in unions + return (a is None) == (b is None) + + if isinstance(model, pydsdl.CompositeType): + if type(a) != type(b): # pragma: no cover # pylint: disable=unidiomatic-typecheck + return False + for f in get_model(a).fields_except_padding: # pragma: no cover + if not are_close(f.data_type, get_attribute(a, f.name), get_attribute(b, f.name)): + return False + return True # Empty objects of same type compare equal + + if isinstance(model, pydsdl.ArrayType): + if len(a) != len(b) or a.dtype != b.dtype: # pragma: no cover + return False + if isinstance(model.element_type, pydsdl.PrimitiveType): + return bool( + numpy.allclose(a, b, equal_nan=True) + ) # Speedup for large arrays like images or point clouds + return all(itertools.starmap(functools.partial(are_close, model.element_type), zip(a, b))) + + if isinstance(model, pydsdl.FloatType): + t = { + 16: numpy.float16, + 32: numpy.float32, + 64: numpy.float64, + }[model.bit_length] + return bool(numpy.allclose(t(a), t(b), equal_nan=True)) # type: ignore + + return bool(numpy.allclose(a, b))