Skip to content

Commit cc15435

Browse files
authored
Migrate tests from unittest to pure pytest. (#238)
1 parent 6caf279 commit cc15435

File tree

7 files changed

+777
-871
lines changed

7 files changed

+777
-871
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ coverage.xml
1414
/index.txt
1515
.dmypy.json
1616
.hypothesis/
17+
.idea/

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ filename = "docs/conf.py"
1515

1616
[tool.coverage.run]
1717
branch = true
18-
include = ["w3lib/*"]
1918

2019
[tool.coverage.report]
2120
exclude_also = [
@@ -54,6 +53,7 @@ disable = [
5453
"raise-missing-from",
5554
"redefined-builtin",
5655
"redefined-outer-name",
56+
"too-few-public-methods",
5757
"too-many-arguments",
5858
"too-many-branches",
5959
"too-many-lines",
@@ -91,6 +91,8 @@ extend-select = [
9191
"PIE",
9292
# pylint
9393
"PL",
94+
# flake8-pytest-style
95+
"PT",
9496
# flake8-use-pathlib
9597
"PTH",
9698
# flake8-pyi

tests/test_encoding.py

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import codecs
4-
import unittest
54
from typing import Any
65

76
from w3lib.encoding import (
@@ -14,7 +13,7 @@
1413
)
1514

1615

17-
class RequestEncodingTests(unittest.TestCase):
16+
class TestRequestEncoding:
1817
utf8_fragments = [
1918
# Content-Type as meta http-equiv
2019
b"""<meta http-equiv="content-type" content="text/html;charset=UTF-8" />""",
@@ -44,84 +43,84 @@ def test_bom(self):
4443
assert bom_encoding is not None
4544
assert bom is not None
4645
decoded = string[len(bom) :].decode(bom_encoding)
47-
self.assertEqual(water_unicode, decoded)
46+
assert water_unicode == decoded
4847
# Body without BOM
4948
enc, bom = read_bom(b"foo")
50-
self.assertEqual(enc, None)
51-
self.assertEqual(bom, None)
49+
assert enc is None
50+
assert bom is None
5251
# Empty body
5352
enc, bom = read_bom(b"")
54-
self.assertEqual(enc, None)
55-
self.assertEqual(bom, None)
53+
assert enc is None
54+
assert bom is None
5655

5756
def test_http_encoding_header(self):
5857
header_value = "Content-Type: text/html; charset=ISO-8859-4"
5958
extracted = http_content_type_encoding(header_value)
60-
self.assertEqual(extracted, "iso8859-4")
61-
self.assertEqual(None, http_content_type_encoding("something else"))
59+
assert extracted == "iso8859-4"
60+
assert http_content_type_encoding("something else") is None
6261

6362
def test_html_body_declared_encoding(self):
6463
for fragment in self.utf8_fragments:
6564
encoding = html_body_declared_encoding(fragment)
66-
self.assertEqual(encoding, "utf-8", fragment)
67-
self.assertEqual(None, html_body_declared_encoding(b"something else"))
68-
self.assertEqual(
69-
None,
65+
assert encoding == "utf-8", fragment
66+
assert None is html_body_declared_encoding(b"something else")
67+
assert (
7068
html_body_declared_encoding(
7169
b"""
7270
<head></head><body>
7371
this isn't searched
7472
<meta charset="utf-8">
7573
"""
76-
),
74+
)
75+
is None
7776
)
78-
self.assertEqual(
79-
None,
77+
assert (
8078
html_body_declared_encoding(
8179
b"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
82-
),
80+
)
81+
is None
8382
)
8483

8584
def test_html_body_declared_encoding_unicode(self):
8685
# html_body_declared_encoding should work when unicode body is passed
87-
self.assertEqual(None, html_body_declared_encoding("something else"))
86+
assert html_body_declared_encoding("something else") is None
8887

8988
for fragment in self.utf8_fragments:
9089
encoding = html_body_declared_encoding(fragment.decode("utf8"))
91-
self.assertEqual(encoding, "utf-8", fragment)
90+
assert encoding == "utf-8", fragment
9291

93-
self.assertEqual(
94-
None,
92+
assert (
9593
html_body_declared_encoding(
9694
"""
9795
<head></head><body>
9896
this isn't searched
9997
<meta charset="utf-8">
10098
"""
101-
),
99+
)
100+
is None
102101
)
103-
self.assertEqual(
104-
None,
102+
assert (
105103
html_body_declared_encoding(
106104
"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
107-
),
105+
)
106+
is None
108107
)
109108

110109

111-
class CodecsEncodingTestCase(unittest.TestCase):
110+
class TestCodecsEncoding:
112111
def test_resolve_encoding(self):
113-
self.assertEqual(resolve_encoding("latin1"), "cp1252")
114-
self.assertEqual(resolve_encoding(" Latin-1"), "cp1252")
115-
self.assertEqual(resolve_encoding("gb_2312-80"), "gb18030")
116-
self.assertEqual(resolve_encoding("unknown encoding"), None)
112+
assert resolve_encoding("latin1") == "cp1252"
113+
assert resolve_encoding(" Latin-1") == "cp1252"
114+
assert resolve_encoding("gb_2312-80") == "gb18030"
115+
assert resolve_encoding("unknown encoding") is None
117116

118117

119-
class UnicodeDecodingTestCase(unittest.TestCase):
118+
class TestUnicodeDecoding:
120119
def test_utf8(self):
121-
self.assertEqual(to_unicode(b"\xc2\xa3", "utf-8"), "\xa3")
120+
assert to_unicode(b"\xc2\xa3", "utf-8") == "\xa3"
122121

123122
def test_invalid_utf8(self):
124-
self.assertEqual(to_unicode(b"\xc2\xc2\xa3", "utf-8"), "\ufffd\xa3")
123+
assert to_unicode(b"\xc2\xc2\xa3", "utf-8") == "\ufffd\xa3"
125124

126125

127126
def ct(charset: str | None) -> str | None:
@@ -132,14 +131,14 @@ def norm_encoding(enc: str) -> str:
132131
return codecs.lookup(enc).name
133132

134133

135-
class HtmlConversionTests(unittest.TestCase):
134+
class TestHtmlConversion:
136135
def test_unicode_body(self):
137136
unicode_string = "\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442"
138137
original_string = unicode_string.encode("cp1251")
139138
encoding, body_unicode = html_to_unicode(ct("cp1251"), original_string)
140139
# check body_as_unicode
141-
self.assertTrue(isinstance(body_unicode, str))
142-
self.assertEqual(body_unicode, unicode_string)
140+
assert isinstance(body_unicode, str)
141+
assert body_unicode == unicode_string
143142

144143
def _assert_encoding(
145144
self,
@@ -150,15 +149,14 @@ def _assert_encoding(
150149
) -> None:
151150
assert not isinstance(body, str)
152151
encoding, body_unicode = html_to_unicode(ct(content_type), body)
153-
self.assertTrue(isinstance(body_unicode, str))
154-
self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
152+
assert isinstance(body_unicode, str)
153+
assert norm_encoding(encoding) == norm_encoding(expected_encoding)
155154

156155
if isinstance(expected_unicode, str):
157-
self.assertEqual(body_unicode, expected_unicode)
156+
assert body_unicode == expected_unicode
158157
else:
159-
self.assertTrue(
160-
body_unicode in expected_unicode,
161-
f"{body_unicode} is not in {expected_unicode}",
158+
assert body_unicode in expected_unicode, (
159+
f"{body_unicode} is not in {expected_unicode}"
162160
)
163161

164162
def test_content_type_and_conversion(self):
@@ -227,8 +225,8 @@ def _assert_encoding_detected(
227225
) -> None:
228226
assert not isinstance(body, str)
229227
encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
230-
self.assertTrue(isinstance(body_unicode, str))
231-
self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))
228+
assert isinstance(body_unicode, str)
229+
assert norm_encoding(encoding) == norm_encoding(expected_encoding)
232230

233231
def test_BOM(self):
234232
# utf-16 cases already tested, as is the BOM detection function

0 commit comments

Comments
 (0)