Skip to content

Commit

Permalink
fix: Limit str usage in beautifulsoup kw arguments
Browse files Browse the repository at this point in the history
- Use warnings.deprecated to block str for excluded_encodings arg
- Exhaustively list possible Literal constants in features arg
  • Loading branch information
abelcheung committed Oct 22, 2024
1 parent bc8798b commit 68facbc
Showing 1 changed file with 55 additions and 18 deletions.
73 changes: 55 additions & 18 deletions lxml-stubs/html/soupparser.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Any, Sequence, overload
import sys
from typing import Any, Collection, Iterable, Literal, overload

from _typeshed import SupportsRead
from bs4 import BeautifulSoup, PageElement, SoupStrainer
Expand All @@ -8,29 +9,58 @@ from .._types import _ET, _AnyStr, _ElementFactory
from ..etree import _ElementTree
from . import HtmlElement

if sys.version_info >= (3, 11):
from typing import Never
else:
from typing_extensions import Never

if sys.version_info >= (3, 13):
from warnings import deprecated
else:
from typing_extensions import deprecated

_Features = Literal[
"fast",
"permissive",
"strict",
"xml",
"html",
"html5",
"html5lib",
"html.parser",
"lxml-xml",
"lxml",
"lxml-html",
]

# NOTES:
# - kw only arguments for fromstring() and parse() are
# taken from types-beautifulsoup4
# - annotation for 'features' argument should have been
#
# features: str | Sequence[str] | None = None
#
# but current modification is much more helpful for users
# - Default value for 'features' argument should have been None,
# but current modification is much more helpful for code
# writers; they don't need to lookup source on how lxml behaves
# - makeelement argument provides very exotic feature:
# it's actually possible to convert BeautifulSoup html tree
# into lxml XML element tree, not just lxml html tree

@overload # guard against plain string in exclude_encodings
@deprecated("Use a collection of encoding, not a vanilla encoding string")
def fromstring(
*args: Any,
exclude_encodings: str,
**kw: Any,
) -> Never: ...
@overload # makeelement is positional
def fromstring(
data: _AnyStr | SupportsRead[str] | SupportsRead[bytes],
beautifulsoup: type[BeautifulSoup] | None,
makeelement: _ElementFactory[_ET],
*,
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> _ET: ...
@overload # makeelement is kw
Expand All @@ -39,11 +69,11 @@ def fromstring(
beautifulsoup: type[BeautifulSoup] | None = None,
*,
makeelement: _ElementFactory[_ET],
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> _ET: ...
@overload # makeelement not provided or is default
Expand All @@ -52,27 +82,34 @@ def fromstring(
beautifulsoup: type[BeautifulSoup] | None = None,
makeelement: None = None,
*,
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> HtmlElement: ...

# Technically Path is also accepted for parse() file argument
# but emits visible warning
@overload # guard against plain string in exclude_encodings
@deprecated("Use encoding collection or iterator, not a vanilla encoding string")
def parse(
*args: Any,
exclude_encodings: str,
**kw: Any,
) -> Never: ...
@overload # makeelement is positional
def parse(
file: _AnyStr | SupportsRead[str] | SupportsRead[bytes],
beautifulsoup: type[BeautifulSoup] | None,
makeelement: _ElementFactory[_ET],
*,
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> _ElementTree[_ET]: ...
@overload
Expand All @@ -81,11 +118,11 @@ def parse( # makeelement is kw
beautifulsoup: type[BeautifulSoup] | None = None,
*,
makeelement: _ElementFactory[_ET],
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> _ElementTree[_ET]: ...
@overload # makeelement not provided or is default
Expand All @@ -94,11 +131,11 @@ def parse(
beautifulsoup: type[BeautifulSoup] | None = None,
makeelement: None = None,
*,
features: str | Sequence[str] = "html.parser",
features: _Features | Collection[_Features] = "html.parser",
builder: TreeBuilder | type[TreeBuilder] | None = None,
parse_only: SoupStrainer | None = None,
from_encoding: str | None = None,
exclude_encodings: Sequence[str] | None = None,
exclude_encodings: Iterable[str] | None = None,
element_classes: dict[type[PageElement], type[Any]] | None = None,
) -> _ElementTree[HtmlElement]: ...
@overload
Expand Down

0 comments on commit 68facbc

Please sign in to comment.