From 7fc09347f9e768cbc13f0c5027b243d4abffb46d Mon Sep 17 00:00:00 2001
From: Federico Busetti <729029+febus982@users.noreply.github.com>
Date: Sun, 6 Oct 2024 21:38:51 +0100
Subject: [PATCH] Make string regex compatible with AsyncAPI renderer

---
 README.md                                     |  3 +
 .../events/fields/types/_canonic_types.py     | 24 ++++---
 tests/events/test_field_types_validation.py   | 66 ++++++++++---------
 3 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index f9e57e9..9eea88a 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,9 @@
 This is an implementation of the [CloudEvents spec](https://github.com/cloudevents/spec/tree/main) using
 [Pydantic V2](https://docs.pydantic.dev/latest/) for high performance during validation and serialization.
 
+It is meant to support natively [FastAPI](https://fastapi.tiangolo.com/)
+and [FastStream](https://faststream.airt.ai/latest/) (WIP)
+
 Currently supported bindings:
 
 | Binding | Format | Single  |  Batch  |
diff --git a/cloudevents_pydantic/events/fields/types/_canonic_types.py b/cloudevents_pydantic/events/fields/types/_canonic_types.py
index 09ef4b7..7cb6272 100644
--- a/cloudevents_pydantic/events/fields/types/_canonic_types.py
+++ b/cloudevents_pydantic/events/fields/types/_canonic_types.py
@@ -83,11 +83,9 @@ def generic_uri_validator(value: str) -> ParseResult:
 (malformed surrogate characters) but pydantic is unhappy to handle them anyway
 so we can avoid the scenario in the regex and make this faster.
 """
-str_constraint = (
-    r"^"
-    r"[^"
-    r"\u0000-\u001F\u007F-\u009F"
-    r"\uFDD0-\uFDEF\uFFFE\uFFFF"
+class_control = r"\u0000-\u001F\u007F-\u009F"
+class_nonchar_utf16_range = r"\uFDD0-\uFDEF\uFFFE\uFFFF"
+class_nonchar_utf32_range = (
     r"\u{1FFFE}\u{1FFFF}"
     r"\u{2FFFE}\u{2FFFF}"
     r"\u{3FFFE}\u{3FFFF}"
@@ -104,11 +102,21 @@ def generic_uri_validator(value: str) -> ParseResult:
     r"\u{EFFFE}\u{EFFFF}"
     r"\u{FFFFE}\u{FFFFF}"
     r"\u{10FFFE}\u{10FFFF}"
-    r"]+"
-    r""
+)
+str_constraint = (
+    r"^"
+    r"[^"
+    + class_control
+    + class_nonchar_utf16_range
+    + class_nonchar_utf32_range
+    + r"]+"
     r"$"
 )
 
+str_constraint_asyncapi_compat = (
+    r"^" r"[^" + class_control + class_nonchar_utf16_range + r"]+" r"$"
+)
+
 
 # TODO: Add types docstrings
 Boolean = Annotated[bool, PlainSerializer(bool_serializer)]
@@ -121,7 +129,7 @@ def generic_uri_validator(value: str) -> ParseResult:
 A whole number in the range -2,147,483,648 to +2,147,483,647 inclusive
 """
 
-String = Annotated[str, StringConstraints(pattern=str_constraint)]
+String = Annotated[str, StringConstraints(pattern=str_constraint_asyncapi_compat)]
 """
 Sequence of allowable Unicode characters
 """
diff --git a/tests/events/test_field_types_validation.py b/tests/events/test_field_types_validation.py
index bd0eba4..99eedb5 100644
--- a/tests/events/test_field_types_validation.py
+++ b/tests/events/test_field_types_validation.py
@@ -144,38 +144,40 @@ class StrModel(BaseModel):
         *list(map(chr, range(ord("\ufdd0"), ord("\ufdef") + 1))),
         "\ufffe",
         "\uffff",
-        "\U0001fffe",
-        "\U0001ffff",
-        "\U0002fffe",
-        "\U0002ffff",
-        "\U0003fffe",
-        "\U0003ffff",
-        "\U0004fffe",
-        "\U0004ffff",
-        "\U0005fffe",
-        "\U0005ffff",
-        "\U0006fffe",
-        "\U0006ffff",
-        "\U0007fffe",
-        "\U0007ffff",
-        "\U0008fffe",
-        "\U0008ffff",
-        "\U0009fffe",
-        "\U0009ffff",
-        "\U000afffe",
-        "\U000affff",
-        "\U000bfffe",
-        "\U000bffff",
-        "\U000cfffe",
-        "\U000cffff",
-        "\U000dfffe",
-        "\U000dffff",
-        "\U000efffe",
-        "\U000effff",
-        "\U000ffffe",
-        "\U000fffff",
-        "\U0010fffe",
-        "\U0010ffff",
+        # These are to be enabled when AsyncAPI regex issue is fixed
+        # https://github.com/asyncapi/asyncapi-react/issues/1071
+        # "\U0001fffe",
+        # "\U0001ffff",
+        # "\U0002fffe",
+        # "\U0002ffff",
+        # "\U0003fffe",
+        # "\U0003ffff",
+        # "\U0004fffe",
+        # "\U0004ffff",
+        # "\U0005fffe",
+        # "\U0005ffff",
+        # "\U0006fffe",
+        # "\U0006ffff",
+        # "\U0007fffe",
+        # "\U0007ffff",
+        # "\U0008fffe",
+        # "\U0008ffff",
+        # "\U0009fffe",
+        # "\U0009ffff",
+        # "\U000afffe",
+        # "\U000affff",
+        # "\U000bfffe",
+        # "\U000bffff",
+        # "\U000cfffe",
+        # "\U000cffff",
+        # "\U000dfffe",
+        # "\U000dffff",
+        # "\U000efffe",
+        # "\U000effff",
+        # "\U000ffffe",
+        # "\U000fffff",
+        # "\U0010fffe",
+        # "\U0010ffff",
     ],
 )
 def test_string_fails_on_unicode_noncharacters(unicode_noncharacter):