From 7fc09347f9e768cbc13f0c5027b243d4abffb46d Mon Sep 17 00:00:00 2001 From: Federico Busetti <729029+febus982@users.noreply.github.com> Date: Sun, 6 Oct 2024 21:38:51 +0100 Subject: [PATCH] Make string regex compatible with AsyncAPI renderer --- README.md | 3 + .../events/fields/types/_canonic_types.py | 24 ++++--- tests/events/test_field_types_validation.py | 66 ++++++++++--------- 3 files changed, 53 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index f9e57e9..9eea88a 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,9 @@ This is an implementation of the [CloudEvents spec](https://github.com/cloudevents/spec/tree/main) using [Pydantic V2](https://docs.pydantic.dev/latest/) for high performance during validation and serialization. +It is meant to support natively [FastAPI](https://fastapi.tiangolo.com/) +and [FastStream](https://faststream.airt.ai/latest/) (WIP) + Currently supported bindings: | Binding | Format | Single | Batch | diff --git a/cloudevents_pydantic/events/fields/types/_canonic_types.py b/cloudevents_pydantic/events/fields/types/_canonic_types.py index 09ef4b7..7cb6272 100644 --- a/cloudevents_pydantic/events/fields/types/_canonic_types.py +++ b/cloudevents_pydantic/events/fields/types/_canonic_types.py @@ -83,11 +83,9 @@ def generic_uri_validator(value: str) -> ParseResult: (malformed surrogate characters) but pydantic is unhappy to handle them anyway so we can avoid the scenario in the regex and make this faster. """ -str_constraint = ( - r"^" - r"[^" - r"\u0000-\u001F\u007F-\u009F" - r"\uFDD0-\uFDEF\uFFFE\uFFFF" +class_control = r"\u0000-\u001F\u007F-\u009F" +class_nonchar_utf16_range = r"\uFDD0-\uFDEF\uFFFE\uFFFF" +class_nonchar_utf32_range = ( r"\u{1FFFE}\u{1FFFF}" r"\u{2FFFE}\u{2FFFF}" r"\u{3FFFE}\u{3FFFF}" @@ -104,11 +102,21 @@ def generic_uri_validator(value: str) -> ParseResult: r"\u{EFFFE}\u{EFFFF}" r"\u{FFFFE}\u{FFFFF}" r"\u{10FFFE}\u{10FFFF}" - r"]+" - r"" +) +str_constraint = ( + r"^" + r"[^" + + class_control + + class_nonchar_utf16_range + + class_nonchar_utf32_range + + r"]+" r"$" ) +str_constraint_asyncapi_compat = ( + r"^" r"[^" + class_control + class_nonchar_utf16_range + r"]+" r"$" +) + # TODO: Add types docstrings Boolean = Annotated[bool, PlainSerializer(bool_serializer)] @@ -121,7 +129,7 @@ def generic_uri_validator(value: str) -> ParseResult: A whole number in the range -2,147,483,648 to +2,147,483,647 inclusive """ -String = Annotated[str, StringConstraints(pattern=str_constraint)] +String = Annotated[str, StringConstraints(pattern=str_constraint_asyncapi_compat)] """ Sequence of allowable Unicode characters """ diff --git a/tests/events/test_field_types_validation.py b/tests/events/test_field_types_validation.py index bd0eba4..99eedb5 100644 --- a/tests/events/test_field_types_validation.py +++ b/tests/events/test_field_types_validation.py @@ -144,38 +144,40 @@ class StrModel(BaseModel): *list(map(chr, range(ord("\ufdd0"), ord("\ufdef") + 1))), "\ufffe", "\uffff", - "\U0001fffe", - "\U0001ffff", - "\U0002fffe", - "\U0002ffff", - "\U0003fffe", - "\U0003ffff", - "\U0004fffe", - "\U0004ffff", - "\U0005fffe", - "\U0005ffff", - "\U0006fffe", - "\U0006ffff", - "\U0007fffe", - "\U0007ffff", - "\U0008fffe", - "\U0008ffff", - "\U0009fffe", - "\U0009ffff", - "\U000afffe", - "\U000affff", - "\U000bfffe", - "\U000bffff", - "\U000cfffe", - "\U000cffff", - "\U000dfffe", - "\U000dffff", - "\U000efffe", - "\U000effff", - "\U000ffffe", - "\U000fffff", - "\U0010fffe", - "\U0010ffff", + # These are to be enabled when AsyncAPI regex issue is fixed + # https://github.com/asyncapi/asyncapi-react/issues/1071 + # "\U0001fffe", + # "\U0001ffff", + # "\U0002fffe", + # "\U0002ffff", + # "\U0003fffe", + # "\U0003ffff", + # "\U0004fffe", + # "\U0004ffff", + # "\U0005fffe", + # "\U0005ffff", + # "\U0006fffe", + # "\U0006ffff", + # "\U0007fffe", + # "\U0007ffff", + # "\U0008fffe", + # "\U0008ffff", + # "\U0009fffe", + # "\U0009ffff", + # "\U000afffe", + # "\U000affff", + # "\U000bfffe", + # "\U000bffff", + # "\U000cfffe", + # "\U000cffff", + # "\U000dfffe", + # "\U000dffff", + # "\U000efffe", + # "\U000effff", + # "\U000ffffe", + # "\U000fffff", + # "\U0010fffe", + # "\U0010ffff", ], ) def test_string_fails_on_unicode_noncharacters(unicode_noncharacter):