Skip to content

Commit bd0571e

Browse files
committed
improved Safe_Str__Html class and add 8 new Safe_Str__Http__* classes
1 parent 40f75db commit bd0571e

18 files changed

+1366
-10
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__ACCEPT__REGEX = re.compile(r'[^a-zA-Z0-9/\-+.*,;=\s]')
5+
TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
6+
7+
# todo: review with other http safe_str classes and see if we can't an Safe_Str__Http__Base
8+
# defines chars like this, and with the max length being the main difference
9+
# also, connect this with the RFP for the HTTP protocol, since that one should be providing a good set of
10+
# mappings for what chars are allowed in these http values
11+
class Safe_Str__Http__Accept(Safe_Str):
12+
"""
13+
Safe string class for HTTP Accept header values.
14+
Allows MIME types with quality parameters.
15+
Examples: 'text/html,application/json;q=0.9', 'application/*', '*/*'
16+
"""
17+
regex = TYPE_SAFE_STR__HTTP__ACCEPT__REGEX
18+
max_length = TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH
19+
trim_whitespace = True
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# todo: review this REGEX, since this look far too permissive for an Auth string (which is usually just ascii values
5+
6+
TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
7+
TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH = 2048
8+
9+
10+
class Safe_Str__Http__Authorization(Safe_Str):
11+
"""
12+
Safe string class for HTTP Authorization header values.
13+
Supports Bearer tokens, Basic auth, and other auth schemes.
14+
Examples: 'Bearer eyJ...', 'Basic dXNlcjpwYXNz'
15+
"""
16+
regex = TYPE_SAFE_STR__HTTP__AUTHORIZATION__REGEX
17+
max_length = TYPE_SAFE_STR__HTTP__AUTHORIZATION__MAX_LENGTH
18+
trim_whitespace = True
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX = re.compile(r'[^a-zA-Z0-9\-,=\s]')
5+
TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH = 256
6+
7+
class Safe_Str__Http__Cache_Control(Safe_Str):
8+
"""
9+
Safe string class for HTTP Cache-Control header values.
10+
Allows standard cache directives with parameters.
11+
Examples: 'no-cache', 'max-age=3600', 'private, must-revalidate'
12+
"""
13+
regex = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__REGEX
14+
max_length = TYPE_SAFE_STR__HTTP__CACHE_CONTROL__MAX_LENGTH
15+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__COOKIE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
5+
#TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 4096
6+
TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH = 32768 # 32k but, this should really be 4k , but validate this with data from live usage of this class
7+
8+
class Safe_Str__Http__Cookie(Safe_Str):
9+
"""
10+
Safe string class for HTTP Cookie header values.
11+
Allows cookie name-value pairs with standard separators.
12+
Example: 'session=abc123; user_id=456; preferences={"theme":"dark"}'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__COOKIE__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__COOKIE__MAX_LENGTH
16+
trim_whitespace = True
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX = re.compile(r'[^a-zA-Z0-9\-]')
5+
TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH = 128
6+
7+
class Safe_Str__Http__Header__Name(Safe_Str):
8+
"""
9+
Safe string class for HTTP header names.
10+
Allows alphanumerics and hyphens as per RFC 7230.
11+
HTTP/2 (RFC 7540) and HTTP/3 (RFC 9114) require header names to be lowercase.
12+
Common examples: content-type, authorization, user-agent, accept, cache-control
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__HEADER_NAME__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__HEADER_NAME__MAX_LENGTH
16+
trim_whitespace = True
17+
to_lower_case = True
18+
allow_empty = True
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars except tab
5+
TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH = 8192
6+
7+
class Safe_Str__Http__Header__Value(Safe_Str):
8+
"""
9+
Safe string class for HTTP header values.
10+
Allows visible ASCII and spaces per RFC 7230.
11+
Filters out control characters except tab (0x09).
12+
"""
13+
regex = TYPE_SAFE_STR__HTTP__HEADER_VALUE__REGEX
14+
max_length = TYPE_SAFE_STR__HTTP__HEADER_VALUE__MAX_LENGTH
15+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# note: as with the others check if this regex is over permissive
5+
TYPE_SAFE_STR__HTTP__LOCATION__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
6+
TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH = 2048
7+
8+
class Safe_Str__Http__Location(Safe_Str):
9+
"""
10+
Safe string class for HTTP Location header values (redirect URLs).
11+
Used in redirect responses (3xx status codes).
12+
Example: 'https://example.com/new-page', '/relative/path'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__LOCATION__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__LOCATION__MAX_LENGTH
16+
trim_whitespace = True
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
3+
4+
# todo: review this regex, since should we be allowing any non text values here?
5+
TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX = re.compile(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]') # Filter control chars
6+
TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH = 512
7+
8+
class Safe_Str__Http__User_Agent(Safe_Str):
9+
"""
10+
Safe string class for HTTP User-Agent header values.
11+
Allows standard user agent strings with various characters.
12+
Example: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
13+
"""
14+
regex = TYPE_SAFE_STR__HTTP__USER_AGENT__REGEX
15+
max_length = TYPE_SAFE_STR__HTTP__USER_AGENT__MAX_LENGTH
16+
trim_whitespace = True

osbot_utils/type_safe/primitives/domains/web/safe_str/Safe_Str__Html.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,32 @@
22
from osbot_utils.type_safe.primitives.core.Safe_Str import Safe_Str
33

44
# Define the size constant
5-
TYPE_SAFE_STR__HTML__MAX_LENGTH = 1048576 # 1 megabyte in bytes
65

7-
# A minimal regex that only filters out:
8-
# - NULL byte (U+0000)
9-
# - Control characters (U+0001 to U+0008, U+000B to U+000C, U+000E to U+001F)
10-
# We explicitly allow:
11-
# - Tab (U+0009), Line Feed (U+000A), and Carriage Return (U+000D)
12-
# - All other Unicode characters
13-
TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F]')
6+
# Safe string class for HTML content with a 10MB limit.
7+
# Allows HTML tags, attributes, and all characters needed for valid HTML,
8+
# while filtering out control characters and NULL bytes that could cause
9+
# security issues or rendering problems.
10+
#
11+
# This is specifically for HTML content (not general text), so it:
12+
# - Allows angle brackets < > for tags
13+
# - Allows quotes " ' for attributes
14+
# - Preserves tabs and newlines for formatting
15+
# - Has a large 10MB limit for full HTML documents
16+
# - Trims outer whitespace but preserves internal formatting
17+
18+
TYPE_SAFE_STR__HTML__MAX_LENGTH = 10485760 # 10 megabytes in bytes (for large HTML documents)
19+
TYPE_SAFE_STR__HTML__REGEX = re.compile(r'[\x00\x01-\x08\x0B\x0C\x0E-\x1F\x7F]')
20+
1421

1522
class Safe_Str__Html(Safe_Str):
1623
max_length = TYPE_SAFE_STR__HTML__MAX_LENGTH
17-
regex = TYPE_SAFE_STR__HTML__REGEX
24+
regex = TYPE_SAFE_STR__HTML__REGEX
25+
trim_whitespace = True # Trim leading/trailing whitespace
26+
normalize_newlines = True # Normalize different newline styles
27+
28+
def __new__(cls, value=None):
29+
if cls.normalize_newlines and value is not None and isinstance(value, str):
30+
value = value.replace('\r\n', '\n').replace('\r', '\n') # Normalize to \n
31+
32+
return super().__new__(cls, value)
33+
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
import pytest
2+
from unittest import TestCase
3+
from osbot_utils.type_safe.primitives.domains.http.safe_str.Safe_Str__Http__Accept import Safe_Str__Http__Accept
4+
5+
6+
class test_Safe_Str__Http__Accept(TestCase):
7+
8+
def test__init__(self): # Test Safe_Str__Http__Accept initialization
9+
accept = Safe_Str__Http__Accept('application/json')
10+
assert type(accept) is Safe_Str__Http__Accept
11+
assert str(accept) == 'application/json'
12+
assert accept == 'application/json'
13+
14+
def test__simple_mime_types(self): # Test simple MIME type values
15+
assert Safe_Str__Http__Accept('text/html' ) == 'text/html'
16+
assert Safe_Str__Http__Accept('application/json' ) == 'application/json'
17+
assert Safe_Str__Http__Accept('application/xml' ) == 'application/xml'
18+
assert Safe_Str__Http__Accept('image/png' ) == 'image/png'
19+
assert Safe_Str__Http__Accept('image/jpeg' ) == 'image/jpeg'
20+
assert Safe_Str__Http__Accept('video/mp4' ) == 'video/mp4'
21+
assert Safe_Str__Http__Accept('audio/mpeg' ) == 'audio/mpeg'
22+
assert Safe_Str__Http__Accept('text/plain' ) == 'text/plain'
23+
assert Safe_Str__Http__Accept('text/css' ) == 'text/css'
24+
assert Safe_Str__Http__Accept('application/javascript') == 'application/javascript'
25+
26+
def test__wildcard_types(self): # Test wildcard MIME types
27+
assert Safe_Str__Http__Accept('*/*' ) == '*/*'
28+
assert Safe_Str__Http__Accept('text/*' ) == 'text/*'
29+
assert Safe_Str__Http__Accept('application/*' ) == 'application/*'
30+
assert Safe_Str__Http__Accept('image/*' ) == 'image/*'
31+
assert Safe_Str__Http__Accept('audio/*' ) == 'audio/*'
32+
assert Safe_Str__Http__Accept('video/*' ) == 'video/*'
33+
34+
def test__quality_parameters(self): # Test quality (q) parameter values
35+
assert Safe_Str__Http__Accept('text/html;q=0.9' ) == 'text/html;q=0.9'
36+
assert Safe_Str__Http__Accept('application/json;q=1.0') == 'application/json;q=1.0'
37+
assert Safe_Str__Http__Accept('*/*;q=0.8' ) == '*/*;q=0.8'
38+
assert Safe_Str__Http__Accept('text/plain;q=0.5' ) == 'text/plain;q=0.5'
39+
assert Safe_Str__Http__Accept('image/webp;q=0.95' ) == 'image/webp;q=0.95'
40+
41+
def test__multiple_mime_types(self): # Test multiple MIME types in one header
42+
assert Safe_Str__Http__Accept('text/html,application/json') == 'text/html,application/json'
43+
assert Safe_Str__Http__Accept('text/html, application/json') == 'text/html, application/json'
44+
assert Safe_Str__Http__Accept('text/html,application/xml,application/json') == 'text/html,application/xml,application/json'
45+
46+
def test__complex_accept_headers(self): # Test complex real-world Accept headers
47+
browser_accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
48+
assert Safe_Str__Http__Accept(browser_accept ) == browser_accept
49+
50+
api_accept = 'application/json, text/plain, */*'
51+
assert Safe_Str__Http__Accept(api_accept ) == api_accept
52+
53+
chrome_accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8'
54+
assert Safe_Str__Http__Accept(chrome_accept ) == chrome_accept
55+
56+
def test__with_charset_parameter(self): # Test MIME types with charset parameter
57+
assert Safe_Str__Http__Accept('text/html; charset=utf-8') == 'text/html; charset=utf-8'
58+
assert Safe_Str__Http__Accept('application/json; charset=utf-8') == 'application/json; charset=utf-8'
59+
assert Safe_Str__Http__Accept('text/plain; charset=iso-8859-1') == 'text/plain; charset=iso-8859-1'
60+
61+
def test__vendor_specific_mime_types(self): # Test vendor-specific MIME types
62+
assert Safe_Str__Http__Accept('application/vnd.api+json') == 'application/vnd.api+json'
63+
assert Safe_Str__Http__Accept('application/vnd.ms-excel') == 'application/vnd.ms-excel'
64+
assert Safe_Str__Http__Accept('application/ld+json' ) == 'application/ld+json'
65+
assert Safe_Str__Http__Accept('application/vnd.github.v3+json') == 'application/vnd.github.v3+json'
66+
67+
def test__decimal_quality_values(self): # Test various quality value decimal formats
68+
assert Safe_Str__Http__Accept('text/html;q=1' ) == 'text/html;q=1'
69+
assert Safe_Str__Http__Accept('text/html;q=0.9' ) == 'text/html;q=0.9'
70+
assert Safe_Str__Http__Accept('text/html;q=0.99' ) == 'text/html;q=0.99'
71+
assert Safe_Str__Http__Accept('text/html;q=0.999' ) == 'text/html;q=0.999'
72+
assert Safe_Str__Http__Accept('text/html;q=0' ) == 'text/html;q=0'
73+
74+
def test__level_parameters(self): # Test level parameter in Accept headers
75+
assert Safe_Str__Http__Accept('text/html; level=1' ) == 'text/html; level=1'
76+
assert Safe_Str__Http__Accept('text/html; level=2; q=0.9') == 'text/html; level=2; q=0.9'
77+
78+
def test__whitespace_handling(self): # Test trim_whitespace = True
79+
assert Safe_Str__Http__Accept(' text/html ' ) == 'text/html'
80+
assert Safe_Str__Http__Accept('application/json ' ) == 'application/json'
81+
assert Safe_Str__Http__Accept(' */*' ) == '*/*'
82+
83+
def test__numeric_conversion(self): # Test conversion from numeric types
84+
assert Safe_Str__Http__Accept(12345 ) == '12345'
85+
assert Safe_Str__Http__Accept(999 ) == '999'
86+
87+
def test__invalid_characters(self): # Test regex character replacement
88+
assert Safe_Str__Http__Accept('text/html<script>' ) == 'text/html_script_'
89+
assert Safe_Str__Http__Accept('text@html' ) == 'text_html'
90+
assert Safe_Str__Http__Accept('text:html' ) == 'text_html'
91+
assert Safe_Str__Http__Accept('application#json' ) == 'application_json'
92+
93+
def test__empty_values(self): # Test allow_empty = True
94+
assert Safe_Str__Http__Accept(None ) == ''
95+
assert Safe_Str__Http__Accept('' ) == ''
96+
assert Safe_Str__Http__Accept(' ' ) == '' # Spaces only (will be trimmed)
97+
98+
def test__max_length(self): # Test TYPE_SAFE_STR__HTTP__ACCEPT__MAX_LENGTH = 512
99+
valid_512 = 'a' * 512
100+
invalid_513 = 'a' * 513
101+
102+
assert Safe_Str__Http__Accept(valid_512 ) == valid_512
103+
104+
with pytest.raises(ValueError) as exc_info:
105+
Safe_Str__Http__Accept(invalid_513)
106+
assert "in Safe_Str__Http__Accept, value exceeds maximum length of 512" in str(exc_info.value)
107+
108+
def test__special_subtypes(self): # Test special MIME subtype formats
109+
assert Safe_Str__Http__Accept('application/x-www-form-urlencoded') == 'application/x-www-form-urlencoded'
110+
assert Safe_Str__Http__Accept('multipart/form-data' ) == 'multipart/form-data'
111+
assert Safe_Str__Http__Accept('text/event-stream' ) == 'text/event-stream'
112+
assert Safe_Str__Http__Accept('application/octet-stream') == 'application/octet-stream'
113+
114+
def test__image_formats(self): # Test various image format MIME types
115+
assert Safe_Str__Http__Accept('image/png' ) == 'image/png'
116+
assert Safe_Str__Http__Accept('image/jpeg' ) == 'image/jpeg'
117+
assert Safe_Str__Http__Accept('image/gif' ) == 'image/gif'
118+
assert Safe_Str__Http__Accept('image/webp' ) == 'image/webp'
119+
assert Safe_Str__Http__Accept('image/svg+xml' ) == 'image/svg+xml'
120+
assert Safe_Str__Http__Accept('image/avif' ) == 'image/avif'
121+
assert Safe_Str__Http__Accept('image/apng' ) == 'image/apng'
122+
123+
def test__str_and_repr(self): # Test string representations
124+
accept = Safe_Str__Http__Accept('application/json')
125+
126+
assert str(accept) == 'application/json'
127+
assert f"{accept}" == 'application/json'
128+
assert f"Accept: {accept}" == 'Accept: application/json'

0 commit comments

Comments
 (0)