fix: Limit str usage in beautifulsoup kw arguments

- Use warnings.deprecated to block str for excluded_encodings arg - Exhaustively list possible Literal constants in features arg
abelcheung · Oct 22, 2024 · 68facbc · 68facbc
1 parent bc8798b
commit 68facbc
Showing 1 changed file with 55 additions and 18 deletions.
diff --git a/lxml-stubs/html/soupparser.pyi b/lxml-stubs/html/soupparser.pyi
@@ -1,4 +1,5 @@
-from typing import Any, Sequence, overload
+import sys
+from typing import Any, Collection, Iterable, Literal, overload
 
 from _typeshed import SupportsRead
 from bs4 import BeautifulSoup, PageElement, SoupStrainer
@@ -8,29 +9,58 @@ from .._types import _ET, _AnyStr, _ElementFactory
 from ..etree import _ElementTree
 from . import HtmlElement
 
+if sys.version_info >= (3, 11):
+    from typing import Never
+else:
+    from typing_extensions import Never
+
+if sys.version_info >= (3, 13):
+    from warnings import deprecated
+else:
+    from typing_extensions import deprecated
+
+_Features = Literal[
+    "fast",
+    "permissive",
+    "strict",
+    "xml",
+    "html",
+    "html5",
+    "html5lib",
+    "html.parser",
+    "lxml-xml",
+    "lxml",
+    "lxml-html",
+]
+
 # NOTES:
 # - kw only arguments for fromstring() and parse() are
 #   taken from types-beautifulsoup4
-# - annotation for 'features' argument should have been
-#
-#       features: str | Sequence[str] | None = None
-#
-#   but current modification is much more helpful for users
+# - Default value for 'features' argument should have been None,
+#   but current modification is much more helpful for code
+#   writers; they don't need to lookup source on how lxml behaves
 # - makeelement argument provides very exotic feature:
 #   it's actually possible to convert BeautifulSoup html tree
 #   into lxml XML element tree, not just lxml html tree
 
+@overload  # guard against plain string in exclude_encodings
+@deprecated("Use a collection of encoding, not a vanilla encoding string")
+def fromstring(
+    *args: Any,
+    exclude_encodings: str,
+    **kw: Any,
+) -> Never: ...
 @overload  # makeelement is positional
 def fromstring(
     data: _AnyStr | SupportsRead[str] | SupportsRead[bytes],
     beautifulsoup: type[BeautifulSoup] | None,
     makeelement: _ElementFactory[_ET],
     *,
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> _ET: ...
 @overload  # makeelement is kw
@@ -39,11 +69,11 @@ def fromstring(
     beautifulsoup: type[BeautifulSoup] | None = None,
     *,
     makeelement: _ElementFactory[_ET],
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> _ET: ...
 @overload  # makeelement not provided or is default
@@ -52,27 +82,34 @@ def fromstring(
     beautifulsoup: type[BeautifulSoup] | None = None,
     makeelement: None = None,
     *,
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> HtmlElement: ...
 
 # Technically Path is also accepted for parse() file argument
 # but emits visible warning
+@overload  # guard against plain string in exclude_encodings
+@deprecated("Use encoding collection or iterator, not a vanilla encoding string")
+def parse(
+    *args: Any,
+    exclude_encodings: str,
+    **kw: Any,
+) -> Never: ...
 @overload  # makeelement is positional
 def parse(
     file: _AnyStr | SupportsRead[str] | SupportsRead[bytes],
     beautifulsoup: type[BeautifulSoup] | None,
     makeelement: _ElementFactory[_ET],
     *,
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> _ElementTree[_ET]: ...
 @overload
@@ -81,11 +118,11 @@ def parse(  # makeelement is kw
     beautifulsoup: type[BeautifulSoup] | None = None,
     *,
     makeelement: _ElementFactory[_ET],
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> _ElementTree[_ET]: ...
 @overload  # makeelement not provided or is default
@@ -94,11 +131,11 @@ def parse(
     beautifulsoup: type[BeautifulSoup] | None = None,
     makeelement: None = None,
     *,
-    features: str | Sequence[str] = "html.parser",
+    features: _Features | Collection[_Features] = "html.parser",
     builder: TreeBuilder | type[TreeBuilder] | None = None,
     parse_only: SoupStrainer | None = None,
     from_encoding: str | None = None,
-    exclude_encodings: Sequence[str] | None = None,
+    exclude_encodings: Iterable[str] | None = None,
     element_classes: dict[type[PageElement], type[Any]] | None = None,
 ) -> _ElementTree[HtmlElement]: ...
 @overload