type: add lxml type-stubs

scanny · Aug 3, 2024 · 7efa08d · 7efa08d
1 parent ce10c20
commit 7efa08d
Show file tree

Hide file tree

Showing 9 changed files with 384 additions and 0 deletions.
diff --git a/typings/lxml/_types.pyi b/typings/lxml/_types.pyi
@@ -0,0 +1,38 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from typing import Any, Callable, Collection, Mapping, Protocol, TypeVar
+
+from typing_extensions import TypeAlias
+
+from .etree import QName, _Element, _ElementTree
+
+_ET = TypeVar("_ET", bound=_Element, default=_Element)
+_ET_co = TypeVar("_ET_co", bound=_Element, default=_Element, covariant=True)
+_KT_co = TypeVar("_KT_co", covariant=True)
+_VT_co = TypeVar("_VT_co", covariant=True)
+
+_AttrName: TypeAlias = str
+
+_AttrVal: TypeAlias = _TextArg
+
+_ElemPathArg: TypeAlias = str | QName
+
+_ElementOrTree: TypeAlias = _ET | _ElementTree[_ET]
+
+_NSMapArg = Mapping[None, str] | Mapping[str, str] | Mapping[str | None, str]
+
+_NonDefaultNSMapArg = Mapping[str, str]
+
+_TagName: TypeAlias = str
+
+_TagSelector: TypeAlias = _TagName | Callable[..., _Element]
+
+# String argument also support QName in various places
+_TextArg: TypeAlias = str | bytes | QName
+
+_XPathObject = Any
+
+class SupportsLaxedItems(Protocol[_KT_co, _VT_co]):
+    def items(self) -> Collection[tuple[_KT_co, _VT_co]]: ...
diff --git a/typings/lxml/etree/__init__.pyi b/typings/lxml/etree/__init__.pyi
@@ -0,0 +1,18 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from ._classlookup import ElementBase as ElementBase
+from ._classlookup import ElementDefaultClassLookup as ElementDefaultClassLookup
+from ._cleanup import strip_elements as strip_elements
+from ._element import _Element as _Element
+from ._element import _ElementTree as _ElementTree
+from ._module_func import fromstring as fromstring
+from ._module_func import tostring as tostring
+from ._module_misc import QName as QName
+from ._nsclasses import ElementNamespaceClassLookup as ElementNamespaceClassLookup
+from ._parser import HTMLParser as HTMLParser
+from ._parser import XMLParser as XMLParser
+
+class CDATA:
+    def __init__(self, data: str) -> None: ...
diff --git a/typings/lxml/etree/_classlookup.pyi b/typings/lxml/etree/_classlookup.pyi
@@ -0,0 +1,75 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from ._element import _Element
+
+class ElementBase(_Element):
+    """The public Element class
+
+    Original Docstring
+    ------------------
+    All custom Element classes must inherit from this one.
+    To create an Element, use the `Element()` factory.
+
+    BIG FAT WARNING: Subclasses *must not* override `__init__` or
+    `__new__` as it is absolutely undefined when these objects will be
+    created or destroyed.  All persistent state of Elements must be
+    stored in the underlying XML.  If you really need to initialize
+    the object after creation, you can implement an ``_init(self)``
+    method that will be called directly after object creation.
+
+    Subclasses of this class can be instantiated to create a new
+    Element.  By default, the tag name will be the class name and the
+    namespace will be empty.  You can modify this with the following
+    class attributes:
+
+    * TAG - the tag name, possibly containing a namespace in Clark
+      notation
+
+    * NAMESPACE - the default namespace URI, unless provided as part
+      of the TAG attribute.
+
+    * HTML - flag if the class is an HTML tag, as opposed to an XML
+      tag.  This only applies to un-namespaced tags and defaults to
+      false (i.e. XML).
+
+    * PARSER - the parser that provides the configuration for the
+      newly created document.  Providing an HTML parser here will
+      default to creating an HTML element.
+
+    In user code, the latter three are commonly inherited in class
+    hierarchies that implement a common namespace.
+    """
+
+    def __init__(
+        self,
+        *children: object,
+        attrib: dict[str, str] | None = None,
+        **_extra: str,
+    ) -> None: ...
+    def _init(self) -> None: ...
+
+class ElementClassLookup:
+    """Superclass of Element class lookups"""
+
+class ElementDefaultClassLookup(ElementClassLookup):
+    """Element class lookup scheme that always returns the default Element
+    class.
+
+    The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
+    accept the respective Element classes."""
+
+    def __init__(
+        self,
+        element: type[ElementBase] | None = None,
+    ) -> None: ...
+
+class FallbackElementClassLookup(ElementClassLookup):
+    """Superclass of Element class lookups with additional fallback"""
+
+    @property
+    def fallback(self) -> ElementClassLookup | None: ...
+    def __init__(self, fallback: ElementClassLookup | None = None) -> None: ...
+    def set_fallback(self, lookup: ElementClassLookup) -> None:
+        """Sets the fallback scheme for this lookup method"""
diff --git a/typings/lxml/etree/_cleanup.pyi b/typings/lxml/etree/_cleanup.pyi
@@ -0,0 +1,21 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from typing import Collection, overload
+
+from .._types import _ElementOrTree, _TagSelector
+
+@overload
+def strip_elements(
+    __tree_or_elem: _ElementOrTree,
+    *tag_names: _TagSelector,
+    with_tail: bool = True,
+) -> None: ...
+@overload
+def strip_elements(
+    __tree_or_elem: _ElementOrTree,
+    __tag: Collection[_TagSelector],
+    /,
+    with_tail: bool = True,
+) -> None: ...
diff --git a/typings/lxml/etree/_element.pyi b/typings/lxml/etree/_element.pyi
@@ -0,0 +1,96 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from typing import Collection, Generic, Iterable, Iterator, TypeVar, overload
+
+from typing_extensions import Self
+
+from .. import _types as _t
+from . import CDATA
+
+_T = TypeVar("_T")
+
+# Behaves like MutableMapping but deviates a lot in details
+class _Attrib:
+    def __bool__(self) -> bool: ...
+    def __contains__(self, __o: object) -> bool: ...
+    def __delitem__(self, __k: _t._AttrName) -> None: ...
+    def __getitem__(self, __k: _t._AttrName) -> str: ...
+    def __iter__(self) -> Iterator[str]: ...
+    def __len__(self) -> int: ...
+    def __setitem__(self, __k: _t._AttrName, __v: _t._AttrVal) -> None: ...
+    @property
+    def _element(self) -> _Element: ...
+    def get(self, key: _t._AttrName, default: _T) -> str | _T: ...
+    def has_key(self, key: _t._AttrName) -> bool: ...
+    def items(self) -> list[tuple[str, str]]: ...
+    def iteritems(self) -> Iterator[tuple[str, str]]: ...
+    def iterkeys(self) -> Iterator[str]: ...
+    def itervalues(self) -> Iterator[str]: ...
+    def keys(self) -> list[str]: ...
+    def values(self) -> list[str]: ...
+
+class _Element:
+    @overload
+    def __getitem__(self, __x: int) -> _Element: ...
+    @overload
+    def __getitem__(self, __x: slice) -> list[_Element]: ...
+    def __contains__(self, __o: object) -> bool: ...
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[_Element]: ...
+    def addprevious(self, element: _Element) -> None: ...
+    def append(self, element: _Element) -> None: ...
+    @property
+    def attrib(self) -> _Attrib: ...
+    def find(self, path: _t._ElemPathArg) -> Self | None: ...
+    def findall(
+        self, path: _t._ElemPathArg, namespaces: _t._NSMapArg | None = None
+    ) -> list[_Element]: ...
+    @overload
+    def get(self, key: _t._AttrName) -> str | None: ...
+    @overload
+    def get(self, key: _t._AttrName, default: _T) -> str | _T: ...
+    def getparent(self) -> _Element | None: ...
+    def index(self, child: _Element, start: int | None = None, end: int | None = None) -> int: ...
+    def iterancestors(
+        self, *, tag: _t._TagSelector | Collection[_t._TagSelector] | None = None
+    ) -> Iterator[Self]: ...
+    @overload
+    def iterchildren(
+        self, *tags: _t._TagSelector, reversed: bool = False
+    ) -> Iterator[_Element]: ...
+    @overload
+    def iterchildren(
+        self,
+        *,
+        tag: _t._TagSelector | Iterable[_t._TagSelector] | None = None,
+        reversed: bool = False,
+    ) -> Iterator[_Element]: ...
+    @overload
+    def itertext(self, *tags: _t._TagSelector, with_tail: bool = True) -> Iterator[str]: ...
+    @overload
+    def itertext(
+        self,
+        *,
+        tag: _t._TagSelector | Collection[_t._TagSelector] | None = None,
+        with_tail: bool = True,
+    ) -> Iterator[str]: ...
+    def remove(self, element: _Element) -> None: ...
+    def set(self, key: _t._AttrName, value: _t._AttrVal) -> None: ...
+    @property
+    def tag(self) -> str: ...
+    @property
+    def tail(self) -> str | None: ...
+    @property
+    def text(self) -> str | None: ...
+    @text.setter
+    def text(self, value: str | CDATA | None) -> None: ...
+    def xpath(
+        self,
+        _path: str,
+        /,
+        namespaces: _t._NonDefaultNSMapArg | None = None,
+    ) -> _t._XPathObject: ...
+
+class _ElementTree(Generic[_t._ET_co]): ...
diff --git a/typings/lxml/etree/_module_func.pyi b/typings/lxml/etree/_module_func.pyi
@@ -0,0 +1,19 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from .._types import _ElementOrTree
+from ..etree import HTMLParser, XMLParser
+from ._element import _Element
+
+def fromstring(text: str | bytes, parser: XMLParser | HTMLParser) -> _Element: ...
+
+# Under XML Canonicalization (C14N) mode, most arguments are ignored,
+# some arguments would even raise exception outright if specified.
+def tostring(
+    element_or_tree: _ElementOrTree,
+    *,
+    encoding: str | type[str] | None = None,
+    pretty_print: bool = False,
+    with_tail: bool = True,
+) -> str: ...
diff --git a/typings/lxml/etree/_module_misc.pyi b/typings/lxml/etree/_module_misc.pyi
@@ -0,0 +1,5 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+class QName: ...
diff --git a/typings/lxml/etree/_nsclasses.pyi b/typings/lxml/etree/_nsclasses.pyi
@@ -0,0 +1,31 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from typing import Iterable, Iterator, MutableMapping, TypeVar
+
+from .._types import SupportsLaxedItems
+from ._classlookup import ElementBase, ElementClassLookup, FallbackElementClassLookup
+
+_KT = TypeVar("_KT")
+_VT = TypeVar("_VT")
+
+class _NamespaceRegistry(MutableMapping[_KT, _VT]):
+    def __delitem__(self, __key: _KT) -> None: ...
+    def __getitem__(self, __key: _KT) -> _VT: ...
+    def __setitem__(self, __key: _KT, __value: _VT) -> None: ...
+    def __iter__(self) -> Iterator[_KT]: ...
+    def __len__(self) -> int: ...
+    def update(  # type: ignore[override]
+        self,
+        class_dict_iterable: SupportsLaxedItems[_KT, _VT] | Iterable[tuple[_KT, _VT]],
+    ) -> None: ...
+    def items(self) -> list[tuple[_KT, _VT]]: ...  # type: ignore[override]
+    def iteritems(self) -> Iterator[tuple[_KT, _VT]]: ...
+    def clear(self) -> None: ...
+
+class _ClassNamespaceRegistry(_NamespaceRegistry[str | None, type[ElementBase]]): ...
+
+class ElementNamespaceClassLookup(FallbackElementClassLookup):
+    def __init__(self, fallback: ElementClassLookup | None = None) -> None: ...
+    def get_namespace(self, ns_uri: str | None) -> _ClassNamespaceRegistry: ...
diff --git a/typings/lxml/etree/_parser.pyi b/typings/lxml/etree/_parser.pyi
@@ -0,0 +1,81 @@
+# pyright: reportPrivateUsage=false
+
+from __future__ import annotations
+
+from typing import Literal
+
+from ._classlookup import ElementClassLookup
+from .._types import _ET_co, _NSMapArg, _TagName, SupportsLaxedItems
+
+class HTMLParser:
+    def __init__(
+        self,
+        *,
+        encoding: str | None = None,
+        remove_blank_text: bool = False,
+        remove_comments: bool = False,
+        remove_pis: bool = False,
+        strip_cdata: bool = True,
+        no_network: bool = True,
+        recover: bool = True,
+        compact: bool = True,
+        default_doctype: bool = True,
+        collect_ids: bool = True,
+        huge_tree: bool = False,
+    ) -> None: ...
+    def set_element_class_lookup(self, lookup: ElementClassLookup | None = None) -> None: ...
+
+class XMLParser:
+    def __init__(
+        self,
+        *,
+        attribute_defaults: bool = False,
+        collect_ids: bool = True,
+        compact: bool = True,
+        dtd_validation: bool = False,
+        encoding: str | None = None,
+        huge_tree: bool = False,
+        load_dtd: bool = False,
+        no_network: bool = True,
+        ns_clean: bool = False,
+        recover: bool = False,
+        remove_blank_text: bool = False,
+        remove_comments: bool = False,
+        remove_pis: bool = False,
+        resolve_entities: bool | Literal["internal"] = "internal",
+        strip_cdata: bool = True,
+    ) -> None: ...
+    def makeelement(
+        self,
+        _tag: _TagName,
+        /,
+        attrib: SupportsLaxedItems[str, str] | None = None,
+        nsmap: _NSMapArg | None = None,
+        **_extra: str,
+    ) -> _ET_co: ...
+    def set_element_class_lookup(self, lookup: ElementClassLookup | None = None) -> None:
+        """
+        Notes
+        -----
+        When calling this method, it is advised to also change typing
+        specialization of concerned parser too, because current python
+        typing system can't change it automatically.
+
+        Example
+        -------
+        Following code demonstrates how to create ``lxml.html.HTMLParser``
+        manually from ``lxml.etree.HTMLParser``::
+
+        ```python
+        parser = etree.HTMLParser()
+        reveal_type(parser)  # HTMLParser[_Element]
+        if TYPE_CHECKING:
+            parser = cast('etree.HTMLParser[HtmlElement]', parser)
+        else:
+            parser.set_element_class_lookup(
+                html.HtmlElementClassLookup())
+        result = etree.fromstring(data, parser=parser)
+        reveal_type(result)  # HtmlElement
+        ```
+        """
+        ...