Skip to content
This repository was archived by the owner on Feb 6, 2019. It is now read-only.

Commit 1fdfc03

Browse files
committed
Use recommended html5lib API for parsing
1 parent 6ed703d commit 1fdfc03

File tree

1 file changed

+1
-8
lines changed

1 file changed

+1
-8
lines changed

w3ctestlib/HTMLSource.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# Licensed under BSD 3-Clause: <http://www.w3.org/Consortium/Legal/2008/03-bsd-license>
55

66
import html5lib
7-
from html5lib import treebuilders
87
from lxml import etree
98
import HTMLSerializer
109
import warnings
@@ -21,9 +20,6 @@
2120
class HTMLSource(XMLSource):
2221
"""FileSource object with support for HTML metadata and HTML->XHTML conversions (untested)."""
2322

24-
# Private Data and Methods
25-
__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder('lxml'))
26-
2723
# Public Methods
2824

2925
def __init__(self, sourceTree, sourcepath, relpath, data=None):
@@ -39,10 +35,7 @@ def parse(self):
3935
if data:
4036
with warnings.catch_warnings():
4137
warnings.simplefilter("ignore")
42-
htmlStream = html5lib.inputstream.HTMLInputStream(data)
43-
if ('utf-8-sig' != self.encoding): # if we found a BOM, respect it
44-
self.encoding = htmlStream.detectEncoding()[0]
45-
self.tree = self.__parser.parse(data, encoding=self.encoding)
38+
self.tree = html5lib.parse(data, treebuilder="lxml")
4639
self.injectedTags = {}
4740
else:
4841
self.tree = None

0 commit comments

Comments
 (0)