owasp-sbot
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Accept.py‎
Lines changed: 19 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Accept.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Authorization.py‎
Lines changed: 18 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Authorization.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cache_Control.py‎
Lines changed: 15 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cache_Control.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cookie.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Cookie.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Name.py‎
Lines changed: 18 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Name.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Value.py‎
Lines changed: 15 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Header__Value.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Location.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__Location.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__User_Agent.py‎
Lines changed: 16 additions & 0 deletions b/‎osbot_utils/type_safe/primitives/domains/http/safe_str/Safe_Str__Http__User_Agent.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py‎
Lines changed: 25 additions & 9 deletions b/‎osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎tests/unit/type_safe/primitives/domains/http/safe_str/test_Safe_Str__Http__Accept.py‎
Lines changed: 128 additions & 0 deletions b/‎tests/unit/type_safe/primitives/domains/http/safe_str/test_Safe_Str__Http__Accept.py‎
Lines changed: 128 additions & 0 deletions
@@ -0,0 +1,19 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__ACCEPT__REGEX      = re.compile(r'[^a-zA-Z0-9/\-+.*,;=\s]')
+TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
+
+# todo: review with other http safe_str classes and see if we can't an Safe_Str__Http__Base
+#       defines chars like this, and with the max length being the main difference
+#       also, connect this with the RFP for the HTTP protocol, since that one should be providing a good set of
+#       mappings for what chars are allowed in these http values
+class Safe_Str__Http__Accept(Safe_Str):
+    """
+    Safe string class for HTTP Accept header values.
+    Allows MIME types with quality parameters.
+    Examples: 'text/html,application/json;q=0.9', 'application/*', '*/*'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__ACCEPT__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,18 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# todo: review this REGEX, since this look far too permissive for an Auth string (which is usually just ascii values
+
+TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH = 2048
+
+
+class Safe_Str__Http__Authorization(Safe_Str):
+    """
+    Safe string class for HTTP Authorization header values.
+    Supports Bearer tokens, Basic auth, and other auth schemes.
+    Examples: 'Bearer eyJ...', 'Basic dXNlcjpwYXNz'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,15 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX      = re.compile(r'[^a-zA-Z0-9\-,=\s]')
+TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH = 256
+
+class Safe_Str__Http__Cache_Control(Safe_Str):
+    """
+    Safe string class for HTTP Cache-Control header values.
+    Allows standard cache directives with parameters.
+    Examples: 'no-cache', 'max-age=3600', 'private, must-revalidate'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__COOKIE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+#TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 4096
+TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 32768     # 32k but, this should really be 4k , but validate this with data from live usage of this class
+
+class Safe_Str__Http__Cookie(Safe_Str):
+    """
+    Safe string class for HTTP Cookie header values.
+    Allows cookie name-value pairs with standard separators.
+    Example: 'session=abc123; user_id=456; preferences={"theme":"dark"}'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__COOKIE__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,18 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX      = re.compile(r'[^a-zA-Z0-9\-]')
+TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH = 128
+
+class Safe_Str__Http__Header__Name(Safe_Str):
+    """
+    Safe string class for HTTP header names.
+    Allows alphanumerics and hyphens as per RFC 7230.
+    HTTP/2 (RFC 7540) and HTTP/3 (RFC 9114) require header names to be lowercase.
+    Common examples: content-type, authorization, user-agent, accept, cache-control
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH
+    trim_whitespace            = True
+    to_lower_case              = True
+    allow_empty                = True
@@ -0,0 +1,15 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars except tab
+TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH = 8192
+
+class Safe_Str__Http__Header__Value(Safe_Str):
+    """
+    Safe string class for HTTP header values.
+    Allows visible ASCII and spaces per RFC 7230.
+    Filters out control characters except tab (0x09).
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# note: as with the others check if this regex is over permissive
+TYPE_SAFE_STR__HTTP__LOCATION__REGEX      = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH = 2048
+
+class Safe_Str__Http__Location(Safe_Str):
+    """
+    Safe string class for HTTP Location header values (redirect URLs).
+    Used in redirect responses (3xx status codes).
+    Example: 'https://example.com/new-page', '/relative/path'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__LOCATION__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH
+    trim_whitespace            = True
@@ -0,0 +1,16 @@
+import re
+from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
+
+# todo: review this regex, since should we be allowing any non text values here?
+TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX      = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]')  # Filter control chars
+TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH = 512
+
+class Safe_Str__Http__User_Agent(Safe_Str):
+    """
+    Safe string class for HTTP User-Agent header values.
+    Allows standard user agent strings with various characters.
+    Example: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+    """
+    regex                      = TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX
+    max_length                 = TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH
+    trim_whitespace            = True
@@ -2,16 +2,32 @@
 from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
 
 # Define the size constant
-TYPE_SAFE_STR__HTML__MAX_LENGTH = 1048576  # 1 megabyte in bytes
 
-# A minimal regex that only filters out:
-# - NULL byte (U+0000)
-# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
-# We explicitly allow:
-# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
-# - All other Unicode characters
-TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F]')
+# Safe string class for HTML content with a 10MB limit.
+# Allows HTML tags, attributes, and all characters needed for valid HTML,
+# while filtering out control characters and NULL bytes that could cause
+# security issues or rendering problems.
+#
+# This is specifically for HTML content (not general text), so it:
+# - Allows angle brackets < > for tags
+# - Allows quotes " ' for attributes
+# - Preserves tabs and newlines for formatting
+# - Has a large 10MB limit for full HTML documents
+# - Trims outer whitespace but preserves internal formatting
+
+TYPE_SAFE_STR__HTML__MAX_LENGTH = 10485760  # 10 megabytes in bytes (for large HTML documents)
+TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
+
 
 class Safe_Str__Html(Safe_Str):
     max_length                 = TYPE_SAFE_STR__HTML__MAX_LENGTH
-    regex                      = TYPE_SAFE_STR__HTML__REGEX
+    regex                      = TYPE_SAFE_STR__HTML__REGEX
+    trim_whitespace             = True                          # Trim leading/trailing whitespace
+    normalize_newlines          = True                          # Normalize different newline styles
+
+    def __new__(cls, value=None):
+        if cls.normalize_newlines and value is not None and isinstance(value, str):
+            value = value.replace('\r\n', '\n').replace('\r', '\n')                     # Normalize to \n
+
+        return super().__new__(cls, value)
+
@@ -0,0 +1,128 @@
+import pytest
+from unittest                                                                   import TestCase
+from osbot_utils.type_safe.primitives.domains.http.safe_str.Safe_Str__Http__Accept import Safe_Str__Http__Accept
+
+
+class test_Safe_Str__Http__Accept(TestCase):
+
+    def test__init__(self):                                                             # Test Safe_Str__Http__Accept initialization
+        accept = Safe_Str__Http__Accept('application/json')
+        assert type(accept)           is Safe_Str__Http__Accept
+        assert str(accept)            == 'application/json'
+        assert accept                 == 'application/json'
+
+    def test__simple_mime_types(self):                                                  # Test simple MIME type values
+        assert Safe_Str__Http__Accept('text/html'            ) == 'text/html'
+        assert Safe_Str__Http__Accept('application/json'     ) == 'application/json'
+        assert Safe_Str__Http__Accept('application/xml'      ) == 'application/xml'
+        assert Safe_Str__Http__Accept('image/png'            ) == 'image/png'
+        assert Safe_Str__Http__Accept('image/jpeg'           ) == 'image/jpeg'
+        assert Safe_Str__Http__Accept('video/mp4'            ) == 'video/mp4'
+        assert Safe_Str__Http__Accept('audio/mpeg'           ) == 'audio/mpeg'
+        assert Safe_Str__Http__Accept('text/plain'           ) == 'text/plain'
+        assert Safe_Str__Http__Accept('text/css'             ) == 'text/css'
+        assert Safe_Str__Http__Accept('application/javascript') == 'application/javascript'
+
+    def test__wildcard_types(self):                                                     # Test wildcard MIME types
+        assert Safe_Str__Http__Accept('*/*'                  ) == '*/*'
+        assert Safe_Str__Http__Accept('text/*'               ) == 'text/*'
+        assert Safe_Str__Http__Accept('application/*'        ) == 'application/*'
+        assert Safe_Str__Http__Accept('image/*'              ) == 'image/*'
+        assert Safe_Str__Http__Accept('audio/*'              ) == 'audio/*'
+        assert Safe_Str__Http__Accept('video/*'              ) == 'video/*'
+
+    def test__quality_parameters(self):                                                 # Test quality (q) parameter values
+        assert Safe_Str__Http__Accept('text/html;q=0.9'      ) == 'text/html;q=0.9'
+        assert Safe_Str__Http__Accept('application/json;q=1.0') == 'application/json;q=1.0'
+        assert Safe_Str__Http__Accept('*/*;q=0.8'            ) == '*/*;q=0.8'
+        assert Safe_Str__Http__Accept('text/plain;q=0.5'     ) == 'text/plain;q=0.5'
+        assert Safe_Str__Http__Accept('image/webp;q=0.95'    ) == 'image/webp;q=0.95'
+
+    def test__multiple_mime_types(self):                                                # Test multiple MIME types in one header
+        assert Safe_Str__Http__Accept('text/html,application/json') == 'text/html,application/json'
+        assert Safe_Str__Http__Accept('text/html, application/json') == 'text/html, application/json'
+        assert Safe_Str__Http__Accept('text/html,application/xml,application/json') == 'text/html,application/xml,application/json'
+
+    def test__complex_accept_headers(self):                                             # Test complex real-world Accept headers
+        browser_accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
+        assert Safe_Str__Http__Accept(browser_accept         ) == browser_accept
+
+        api_accept = 'application/json, text/plain, */*'
+        assert Safe_Str__Http__Accept(api_accept             ) == api_accept
+
+        chrome_accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8'
+        assert Safe_Str__Http__Accept(chrome_accept          ) == chrome_accept
+
+    def test__with_charset_parameter(self):                                             # Test MIME types with charset parameter
+        assert Safe_Str__Http__Accept('text/html; charset=utf-8') == 'text/html; charset=utf-8'
+        assert Safe_Str__Http__Accept('application/json; charset=utf-8') == 'application/json; charset=utf-8'
+        assert Safe_Str__Http__Accept('text/plain; charset=iso-8859-1') == 'text/plain; charset=iso-8859-1'
+
+    def test__vendor_specific_mime_types(self):                                         # Test vendor-specific MIME types
+        assert Safe_Str__Http__Accept('application/vnd.api+json') == 'application/vnd.api+json'
+        assert Safe_Str__Http__Accept('application/vnd.ms-excel') == 'application/vnd.ms-excel'
+        assert Safe_Str__Http__Accept('application/ld+json'  ) == 'application/ld+json'
+        assert Safe_Str__Http__Accept('application/vnd.github.v3+json') == 'application/vnd.github.v3+json'
+
+    def test__decimal_quality_values(self):                                             # Test various quality value decimal formats
+        assert Safe_Str__Http__Accept('text/html;q=1'        ) == 'text/html;q=1'
+        assert Safe_Str__Http__Accept('text/html;q=0.9'      ) == 'text/html;q=0.9'
+        assert Safe_Str__Http__Accept('text/html;q=0.99'     ) == 'text/html;q=0.99'
+        assert Safe_Str__Http__Accept('text/html;q=0.999'    ) == 'text/html;q=0.999'
+        assert Safe_Str__Http__Accept('text/html;q=0'        ) == 'text/html;q=0'
+
+    def test__level_parameters(self):                                                   # Test level parameter in Accept headers
+        assert Safe_Str__Http__Accept('text/html; level=1'   ) == 'text/html; level=1'
+        assert Safe_Str__Http__Accept('text/html; level=2; q=0.9') == 'text/html; level=2; q=0.9'
+
+    def test__whitespace_handling(self):                                                # Test trim_whitespace = True
+        assert Safe_Str__Http__Accept('  text/html  '       ) == 'text/html'
+        assert Safe_Str__Http__Accept('application/json  '  ) == 'application/json'
+        assert Safe_Str__Http__Accept('  */*'               ) == '*/*'
+
+    def test__numeric_conversion(self):                                                 # Test conversion from numeric types
+        assert Safe_Str__Http__Accept(12345                  ) == '12345'
+        assert Safe_Str__Http__Accept(999                    ) == '999'
+
+    def test__invalid_characters(self):                                                 # Test regex character replacement
+        assert Safe_Str__Http__Accept('text/html<script>'    ) == 'text/html_script_'
+        assert Safe_Str__Http__Accept('text@html'            ) == 'text_html'
+        assert Safe_Str__Http__Accept('text:html'            ) == 'text_html'
+        assert Safe_Str__Http__Accept('application#json'     ) == 'application_json'
+
+    def test__empty_values(self):                                                       # Test allow_empty = True
+        assert Safe_Str__Http__Accept(None                   ) == ''
+        assert Safe_Str__Http__Accept(''                     ) == ''
+        assert Safe_Str__Http__Accept('   '                  ) == ''                    # Spaces only (will be trimmed)
+
+    def test__max_length(self):                                                         # Test TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
+        valid_512   = 'a' * 512
+        invalid_513 = 'a' * 513
+
+        assert Safe_Str__Http__Accept(valid_512              ) == valid_512
+
+        with pytest.raises(ValueError) as exc_info:
+            Safe_Str__Http__Accept(invalid_513)
+        assert "in Safe_Str__Http__Accept, value exceeds maximum length of 512" in str(exc_info.value)
+
+    def test__special_subtypes(self):                                                   # Test special MIME subtype formats
+        assert Safe_Str__Http__Accept('application/x-www-form-urlencoded') == 'application/x-www-form-urlencoded'
+        assert Safe_Str__Http__Accept('multipart/form-data'  ) == 'multipart/form-data'
+        assert Safe_Str__Http__Accept('text/event-stream'    ) == 'text/event-stream'
+        assert Safe_Str__Http__Accept('application/octet-stream') == 'application/octet-stream'
+
+    def test__image_formats(self):                                                      # Test various image format MIME types
+        assert Safe_Str__Http__Accept('image/png'            ) == 'image/png'
+        assert Safe_Str__Http__Accept('image/jpeg'           ) == 'image/jpeg'
+        assert Safe_Str__Http__Accept('image/gif'            ) == 'image/gif'
+        assert Safe_Str__Http__Accept('image/webp'           ) == 'image/webp'
+        assert Safe_Str__Http__Accept('image/svg+xml'        ) == 'image/svg+xml'
+        assert Safe_Str__Http__Accept('image/avif'           ) == 'image/avif'
+        assert Safe_Str__Http__Accept('image/apng'           ) == 'image/apng'
+
+    def test__str_and_repr(self):                                                       # Test string representations
+        accept = Safe_Str__Http__Accept('application/json')
+
+        assert str(accept)            == 'application/json'
+        assert f"{accept}"            == 'application/json'
+        assert f"Accept: {accept}"    == 'Accept: application/json'