From 68facbcc2c87a9259891a4b27bc8701116aea807 Mon Sep 17 00:00:00 2001 From: Abel Cheung Date: Tue, 22 Oct 2024 08:25:51 +0000 Subject: [PATCH] fix: Limit str usage in beautifulsoup kw arguments - Use warnings.deprecated to block str for excluded_encodings arg - Exhaustively list possible Literal constants in features arg --- lxml-stubs/html/soupparser.pyi | 73 +++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/lxml-stubs/html/soupparser.pyi b/lxml-stubs/html/soupparser.pyi index 94630b7..273b9e7 100644 --- a/lxml-stubs/html/soupparser.pyi +++ b/lxml-stubs/html/soupparser.pyi @@ -1,4 +1,5 @@ -from typing import Any, Sequence, overload +import sys +from typing import Any, Collection, Iterable, Literal, overload from _typeshed import SupportsRead from bs4 import BeautifulSoup, PageElement, SoupStrainer @@ -8,29 +9,58 @@ from .._types import _ET, _AnyStr, _ElementFactory from ..etree import _ElementTree from . import HtmlElement +if sys.version_info >= (3, 11): + from typing import Never +else: + from typing_extensions import Never + +if sys.version_info >= (3, 13): + from warnings import deprecated +else: + from typing_extensions import deprecated + +_Features = Literal[ + "fast", + "permissive", + "strict", + "xml", + "html", + "html5", + "html5lib", + "html.parser", + "lxml-xml", + "lxml", + "lxml-html", +] + # NOTES: # - kw only arguments for fromstring() and parse() are # taken from types-beautifulsoup4 -# - annotation for 'features' argument should have been -# -# features: str | Sequence[str] | None = None -# -# but current modification is much more helpful for users +# - Default value for 'features' argument should have been None, +# but current modification is much more helpful for code +# writers; they don't need to lookup source on how lxml behaves # - makeelement argument provides very exotic feature: # it's actually possible to convert BeautifulSoup html tree # into lxml XML element tree, not just lxml html tree +@overload # guard against plain string in exclude_encodings +@deprecated("Use a collection of encoding, not a vanilla encoding string") +def fromstring( + *args: Any, + exclude_encodings: str, + **kw: Any, +) -> Never: ... @overload # makeelement is positional def fromstring( data: _AnyStr | SupportsRead[str] | SupportsRead[bytes], beautifulsoup: type[BeautifulSoup] | None, makeelement: _ElementFactory[_ET], *, - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> _ET: ... @overload # makeelement is kw @@ -39,11 +69,11 @@ def fromstring( beautifulsoup: type[BeautifulSoup] | None = None, *, makeelement: _ElementFactory[_ET], - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> _ET: ... @overload # makeelement not provided or is default @@ -52,27 +82,34 @@ def fromstring( beautifulsoup: type[BeautifulSoup] | None = None, makeelement: None = None, *, - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> HtmlElement: ... # Technically Path is also accepted for parse() file argument # but emits visible warning +@overload # guard against plain string in exclude_encodings +@deprecated("Use encoding collection or iterator, not a vanilla encoding string") +def parse( + *args: Any, + exclude_encodings: str, + **kw: Any, +) -> Never: ... @overload # makeelement is positional def parse( file: _AnyStr | SupportsRead[str] | SupportsRead[bytes], beautifulsoup: type[BeautifulSoup] | None, makeelement: _ElementFactory[_ET], *, - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> _ElementTree[_ET]: ... @overload @@ -81,11 +118,11 @@ def parse( # makeelement is kw beautifulsoup: type[BeautifulSoup] | None = None, *, makeelement: _ElementFactory[_ET], - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> _ElementTree[_ET]: ... @overload # makeelement not provided or is default @@ -94,11 +131,11 @@ def parse( beautifulsoup: type[BeautifulSoup] | None = None, makeelement: None = None, *, - features: str | Sequence[str] = "html.parser", + features: _Features | Collection[_Features] = "html.parser", builder: TreeBuilder | type[TreeBuilder] | None = None, parse_only: SoupStrainer | None = None, from_encoding: str | None = None, - exclude_encodings: Sequence[str] | None = None, + exclude_encodings: Iterable[str] | None = None, element_classes: dict[type[PageElement], type[Any]] | None = None, ) -> _ElementTree[HtmlElement]: ... @overload