Skip to content

Commit

Permalink
type: add lxml type-stubs
Browse files Browse the repository at this point in the history
  • Loading branch information
scanny committed Aug 3, 2024
1 parent ce10c20 commit 7efa08d
Show file tree
Hide file tree
Showing 9 changed files with 384 additions and 0 deletions.
38 changes: 38 additions & 0 deletions typings/lxml/_types.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from typing import Any, Callable, Collection, Mapping, Protocol, TypeVar

from typing_extensions import TypeAlias

from .etree import QName, _Element, _ElementTree

_ET = TypeVar("_ET", bound=_Element, default=_Element)
_ET_co = TypeVar("_ET_co", bound=_Element, default=_Element, covariant=True)
_KT_co = TypeVar("_KT_co", covariant=True)
_VT_co = TypeVar("_VT_co", covariant=True)

_AttrName: TypeAlias = str

_AttrVal: TypeAlias = _TextArg

_ElemPathArg: TypeAlias = str | QName

_ElementOrTree: TypeAlias = _ET | _ElementTree[_ET]

_NSMapArg = Mapping[None, str] | Mapping[str, str] | Mapping[str | None, str]

_NonDefaultNSMapArg = Mapping[str, str]

_TagName: TypeAlias = str

_TagSelector: TypeAlias = _TagName | Callable[..., _Element]

# String argument also support QName in various places
_TextArg: TypeAlias = str | bytes | QName

_XPathObject = Any

class SupportsLaxedItems(Protocol[_KT_co, _VT_co]):
def items(self) -> Collection[tuple[_KT_co, _VT_co]]: ...
18 changes: 18 additions & 0 deletions typings/lxml/etree/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from ._classlookup import ElementBase as ElementBase
from ._classlookup import ElementDefaultClassLookup as ElementDefaultClassLookup
from ._cleanup import strip_elements as strip_elements
from ._element import _Element as _Element
from ._element import _ElementTree as _ElementTree
from ._module_func import fromstring as fromstring
from ._module_func import tostring as tostring
from ._module_misc import QName as QName
from ._nsclasses import ElementNamespaceClassLookup as ElementNamespaceClassLookup
from ._parser import HTMLParser as HTMLParser
from ._parser import XMLParser as XMLParser

class CDATA:
def __init__(self, data: str) -> None: ...
75 changes: 75 additions & 0 deletions typings/lxml/etree/_classlookup.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from ._element import _Element

class ElementBase(_Element):
"""The public Element class
Original Docstring
------------------
All custom Element classes must inherit from this one.
To create an Element, use the `Element()` factory.
BIG FAT WARNING: Subclasses *must not* override `__init__` or
`__new__` as it is absolutely undefined when these objects will be
created or destroyed. All persistent state of Elements must be
stored in the underlying XML. If you really need to initialize
the object after creation, you can implement an ``_init(self)``
method that will be called directly after object creation.
Subclasses of this class can be instantiated to create a new
Element. By default, the tag name will be the class name and the
namespace will be empty. You can modify this with the following
class attributes:
* TAG - the tag name, possibly containing a namespace in Clark
notation
* NAMESPACE - the default namespace URI, unless provided as part
of the TAG attribute.
* HTML - flag if the class is an HTML tag, as opposed to an XML
tag. This only applies to un-namespaced tags and defaults to
false (i.e. XML).
* PARSER - the parser that provides the configuration for the
newly created document. Providing an HTML parser here will
default to creating an HTML element.
In user code, the latter three are commonly inherited in class
hierarchies that implement a common namespace.
"""

def __init__(
self,
*children: object,
attrib: dict[str, str] | None = None,
**_extra: str,
) -> None: ...
def _init(self) -> None: ...

class ElementClassLookup:
"""Superclass of Element class lookups"""

class ElementDefaultClassLookup(ElementClassLookup):
"""Element class lookup scheme that always returns the default Element
class.
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
accept the respective Element classes."""

def __init__(
self,
element: type[ElementBase] | None = None,
) -> None: ...

class FallbackElementClassLookup(ElementClassLookup):
"""Superclass of Element class lookups with additional fallback"""

@property
def fallback(self) -> ElementClassLookup | None: ...
def __init__(self, fallback: ElementClassLookup | None = None) -> None: ...
def set_fallback(self, lookup: ElementClassLookup) -> None:
"""Sets the fallback scheme for this lookup method"""
21 changes: 21 additions & 0 deletions typings/lxml/etree/_cleanup.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from typing import Collection, overload

from .._types import _ElementOrTree, _TagSelector

@overload
def strip_elements(
__tree_or_elem: _ElementOrTree,
*tag_names: _TagSelector,
with_tail: bool = True,
) -> None: ...
@overload
def strip_elements(
__tree_or_elem: _ElementOrTree,
__tag: Collection[_TagSelector],
/,
with_tail: bool = True,
) -> None: ...
96 changes: 96 additions & 0 deletions typings/lxml/etree/_element.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from typing import Collection, Generic, Iterable, Iterator, TypeVar, overload

from typing_extensions import Self

from .. import _types as _t
from . import CDATA

_T = TypeVar("_T")

# Behaves like MutableMapping but deviates a lot in details
class _Attrib:
def __bool__(self) -> bool: ...
def __contains__(self, __o: object) -> bool: ...
def __delitem__(self, __k: _t._AttrName) -> None: ...
def __getitem__(self, __k: _t._AttrName) -> str: ...
def __iter__(self) -> Iterator[str]: ...
def __len__(self) -> int: ...
def __setitem__(self, __k: _t._AttrName, __v: _t._AttrVal) -> None: ...
@property
def _element(self) -> _Element: ...
def get(self, key: _t._AttrName, default: _T) -> str | _T: ...
def has_key(self, key: _t._AttrName) -> bool: ...
def items(self) -> list[tuple[str, str]]: ...
def iteritems(self) -> Iterator[tuple[str, str]]: ...
def iterkeys(self) -> Iterator[str]: ...
def itervalues(self) -> Iterator[str]: ...
def keys(self) -> list[str]: ...
def values(self) -> list[str]: ...

class _Element:
@overload
def __getitem__(self, __x: int) -> _Element: ...
@overload
def __getitem__(self, __x: slice) -> list[_Element]: ...
def __contains__(self, __o: object) -> bool: ...
def __len__(self) -> int: ...
def __iter__(self) -> Iterator[_Element]: ...
def addprevious(self, element: _Element) -> None: ...
def append(self, element: _Element) -> None: ...
@property
def attrib(self) -> _Attrib: ...
def find(self, path: _t._ElemPathArg) -> Self | None: ...
def findall(
self, path: _t._ElemPathArg, namespaces: _t._NSMapArg | None = None
) -> list[_Element]: ...
@overload
def get(self, key: _t._AttrName) -> str | None: ...
@overload
def get(self, key: _t._AttrName, default: _T) -> str | _T: ...
def getparent(self) -> _Element | None: ...
def index(self, child: _Element, start: int | None = None, end: int | None = None) -> int: ...
def iterancestors(
self, *, tag: _t._TagSelector | Collection[_t._TagSelector] | None = None
) -> Iterator[Self]: ...
@overload
def iterchildren(
self, *tags: _t._TagSelector, reversed: bool = False
) -> Iterator[_Element]: ...
@overload
def iterchildren(
self,
*,
tag: _t._TagSelector | Iterable[_t._TagSelector] | None = None,
reversed: bool = False,
) -> Iterator[_Element]: ...
@overload
def itertext(self, *tags: _t._TagSelector, with_tail: bool = True) -> Iterator[str]: ...
@overload
def itertext(
self,
*,
tag: _t._TagSelector | Collection[_t._TagSelector] | None = None,
with_tail: bool = True,
) -> Iterator[str]: ...
def remove(self, element: _Element) -> None: ...
def set(self, key: _t._AttrName, value: _t._AttrVal) -> None: ...
@property
def tag(self) -> str: ...
@property
def tail(self) -> str | None: ...
@property
def text(self) -> str | None: ...
@text.setter
def text(self, value: str | CDATA | None) -> None: ...
def xpath(
self,
_path: str,
/,
namespaces: _t._NonDefaultNSMapArg | None = None,
) -> _t._XPathObject: ...

class _ElementTree(Generic[_t._ET_co]): ...
19 changes: 19 additions & 0 deletions typings/lxml/etree/_module_func.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from .._types import _ElementOrTree
from ..etree import HTMLParser, XMLParser
from ._element import _Element

def fromstring(text: str | bytes, parser: XMLParser | HTMLParser) -> _Element: ...

# Under XML Canonicalization (C14N) mode, most arguments are ignored,
# some arguments would even raise exception outright if specified.
def tostring(
element_or_tree: _ElementOrTree,
*,
encoding: str | type[str] | None = None,
pretty_print: bool = False,
with_tail: bool = True,
) -> str: ...
5 changes: 5 additions & 0 deletions typings/lxml/etree/_module_misc.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

class QName: ...
31 changes: 31 additions & 0 deletions typings/lxml/etree/_nsclasses.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from typing import Iterable, Iterator, MutableMapping, TypeVar

from .._types import SupportsLaxedItems
from ._classlookup import ElementBase, ElementClassLookup, FallbackElementClassLookup

_KT = TypeVar("_KT")
_VT = TypeVar("_VT")

class _NamespaceRegistry(MutableMapping[_KT, _VT]):
def __delitem__(self, __key: _KT) -> None: ...
def __getitem__(self, __key: _KT) -> _VT: ...
def __setitem__(self, __key: _KT, __value: _VT) -> None: ...
def __iter__(self) -> Iterator[_KT]: ...
def __len__(self) -> int: ...
def update( # type: ignore[override]
self,
class_dict_iterable: SupportsLaxedItems[_KT, _VT] | Iterable[tuple[_KT, _VT]],
) -> None: ...
def items(self) -> list[tuple[_KT, _VT]]: ... # type: ignore[override]
def iteritems(self) -> Iterator[tuple[_KT, _VT]]: ...
def clear(self) -> None: ...

class _ClassNamespaceRegistry(_NamespaceRegistry[str | None, type[ElementBase]]): ...

class ElementNamespaceClassLookup(FallbackElementClassLookup):
def __init__(self, fallback: ElementClassLookup | None = None) -> None: ...
def get_namespace(self, ns_uri: str | None) -> _ClassNamespaceRegistry: ...
81 changes: 81 additions & 0 deletions typings/lxml/etree/_parser.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# pyright: reportPrivateUsage=false

from __future__ import annotations

from typing import Literal

from ._classlookup import ElementClassLookup
from .._types import _ET_co, _NSMapArg, _TagName, SupportsLaxedItems

class HTMLParser:
def __init__(
self,
*,
encoding: str | None = None,
remove_blank_text: bool = False,
remove_comments: bool = False,
remove_pis: bool = False,
strip_cdata: bool = True,
no_network: bool = True,
recover: bool = True,
compact: bool = True,
default_doctype: bool = True,
collect_ids: bool = True,
huge_tree: bool = False,
) -> None: ...
def set_element_class_lookup(self, lookup: ElementClassLookup | None = None) -> None: ...

class XMLParser:
def __init__(
self,
*,
attribute_defaults: bool = False,
collect_ids: bool = True,
compact: bool = True,
dtd_validation: bool = False,
encoding: str | None = None,
huge_tree: bool = False,
load_dtd: bool = False,
no_network: bool = True,
ns_clean: bool = False,
recover: bool = False,
remove_blank_text: bool = False,
remove_comments: bool = False,
remove_pis: bool = False,
resolve_entities: bool | Literal["internal"] = "internal",
strip_cdata: bool = True,
) -> None: ...
def makeelement(
self,
_tag: _TagName,
/,
attrib: SupportsLaxedItems[str, str] | None = None,
nsmap: _NSMapArg | None = None,
**_extra: str,
) -> _ET_co: ...
def set_element_class_lookup(self, lookup: ElementClassLookup | None = None) -> None:
"""
Notes
-----
When calling this method, it is advised to also change typing
specialization of concerned parser too, because current python
typing system can't change it automatically.
Example
-------
Following code demonstrates how to create ``lxml.html.HTMLParser``
manually from ``lxml.etree.HTMLParser``::
```python
parser = etree.HTMLParser()
reveal_type(parser) # HTMLParser[_Element]
if TYPE_CHECKING:
parser = cast('etree.HTMLParser[HtmlElement]', parser)
else:
parser.set_element_class_lookup(
html.HtmlElementClassLookup())
result = etree.fromstring(data, parser=parser)
reveal_type(result) # HtmlElement
```
"""
...

0 comments on commit 7efa08d

Please sign in to comment.