Skip to content

Commit f8313e2

Browse files
committed
Migrate to ruff.
1 parent 6057cac commit f8313e2

17 files changed

+211
-121
lines changed

.bandit.yml

Lines changed: 0 additions & 5 deletions
This file was deleted.

.flake8

Lines changed: 0 additions & 2 deletions
This file was deleted.

.isort.cfg

Lines changed: 0 additions & 2 deletions
This file was deleted.

.pre-commit-config.yaml

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,7 @@
11
repos:
2-
- repo: https://github.com/PyCQA/bandit
3-
rev: 1.8.2
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.9.3
44
hooks:
5-
- id: bandit
6-
args: [-r, -c, .bandit.yml]
7-
- repo: https://github.com/PyCQA/flake8
8-
rev: 7.1.1
9-
hooks:
10-
- id: flake8
11-
- repo: https://github.com/psf/black.git
12-
rev: 24.10.0
13-
hooks:
14-
- id: black
15-
- repo: https://github.com/pycqa/isort
16-
rev: 5.13.2
17-
hooks:
18-
- id: isort
19-
- repo: https://github.com/asottile/pyupgrade
20-
rev: v3.19.1
21-
hooks:
22-
- id: pyupgrade
23-
args: [--py39-plus]
5+
- id: ruff
6+
args: [ --fix ]
7+
- id: ruff-format

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
# All configuration values have a default; values that are commented out
1010
# serve to show the default.
1111

12-
import os
1312
import sys
13+
from pathlib import Path
1414

1515
# If extensions (or modules to document with autodoc) are in another directory,
1616
# add these directories to sys.path here. If the directory is relative to the
1717
# documentation root, use os.path.abspath to make it absolute, like shown here.
18-
sys.path.insert(0, os.path.abspath(".."))
18+
sys.path.insert(0, str(Path(__file__).parent.parent))
1919

2020
# -- General configuration -----------------------------------------------------
2121

pyproject.toml

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
[tool.ruff.lint]
2+
extend-select = [
3+
# flake8-bugbear
4+
"B",
5+
# flake8-comprehensions
6+
"C4",
7+
# pydocstyle
8+
"D",
9+
# flake8-future-annotations
10+
"FA",
11+
# flynt
12+
"FLY",
13+
# refurb
14+
"FURB",
15+
# isort
16+
"I",
17+
# flake8-implicit-str-concat
18+
"ISC",
19+
# flake8-logging
20+
"LOG",
21+
# Perflint
22+
"PERF",
23+
# pygrep-hooks
24+
"PGH",
25+
# flake8-pie
26+
"PIE",
27+
# pylint
28+
"PL",
29+
# flake8-use-pathlib
30+
"PTH",
31+
# flake8-pyi
32+
"PYI",
33+
# flake8-quotes
34+
"Q",
35+
# flake8-return
36+
"RET",
37+
# flake8-raise
38+
"RSE",
39+
# Ruff-specific rules
40+
"RUF",
41+
# flake8-bandit
42+
"S",
43+
# flake8-simplify
44+
"SIM",
45+
# flake8-slots
46+
"SLOT",
47+
# flake8-debugger
48+
"T10",
49+
# flake8-type-checking
50+
"TC",
51+
# pyupgrade
52+
"UP",
53+
# pycodestyle warnings
54+
"W",
55+
# flake8-2020
56+
"YTT",
57+
]
58+
ignore = [
59+
# Within an `except` clause, raise exceptions with `raise ... from`
60+
"B904",
61+
# Missing docstring in public module
62+
"D100",
63+
# Missing docstring in public class
64+
"D101",
65+
# Missing docstring in public method
66+
"D102",
67+
# Missing docstring in public function
68+
"D103",
69+
# Missing docstring in public package
70+
"D104",
71+
# Missing docstring in magic method
72+
"D105",
73+
# Missing docstring in public nested class
74+
"D106",
75+
# Missing docstring in __init__
76+
"D107",
77+
# One-line docstring should fit on one line with quotes
78+
"D200",
79+
# No blank lines allowed after function docstring
80+
"D202",
81+
# 1 blank line required between summary line and description
82+
"D205",
83+
# Multi-line docstring closing quotes should be on a separate line
84+
"D209",
85+
# First line should end with a period
86+
"D400",
87+
# First line should be in imperative mood; try rephrasing
88+
"D401",
89+
# First line should not be the function's "signature"
90+
"D402",
91+
# First word of the first line should be properly capitalized
92+
"D403",
93+
# No blank lines allowed between a section header and its content
94+
"D412",
95+
# Too many return statements
96+
"PLR0911",
97+
# Too many branches
98+
"PLR0912",
99+
# Too many arguments in function definition
100+
"PLR0913",
101+
# Too many statements
102+
"PLR0915",
103+
# Magic value used in comparison
104+
"PLR2004",
105+
# String contains ambiguous {}.
106+
"RUF001",
107+
# Docstring contains ambiguous {}.
108+
"RUF002",
109+
# Comment contains ambiguous {}.
110+
"RUF003",
111+
# Mutable class attributes should be annotated with `typing.ClassVar`
112+
"RUF012",
113+
# Use of `assert` detected
114+
"S101",
115+
]
116+
117+
[tool.ruff.lint.pydocstyle]
118+
convention = "pep257"

setup.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
from setuptools import find_packages, setup
1+
from pathlib import Path
22

3-
with open("README.rst", encoding="utf-8") as f:
4-
long_description = f.read()
3+
from setuptools import find_packages, setup
54

65
setup(
76
name="w3lib",
87
version="2.3.1",
98
license="BSD",
109
description="Library of web-related functions",
11-
long_description=long_description,
10+
long_description=Path("README.rst").read_text(encoding="utf-8"),
1211
long_description_content_type="text/x-rst",
1312
author="Scrapy project",
1413
author_email="info@scrapy.org",

tests/test_encoding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class RequestEncodingTests(unittest.TestCase):
3333

3434
def test_bom(self):
3535
# cjk water character in unicode
36-
water_unicode = "\u6C34"
36+
water_unicode = "\u6c34"
3737
# BOM + water character encoded
3838
utf16be = b"\xfe\xff\x6c\x34"
3939
utf16le = b"\xff\xfe\x34\x6c"
@@ -273,8 +273,8 @@ def test_python_crash(self):
273273

274274
random.seed(42)
275275
buf = BytesIO()
276-
for i in range(150000):
277-
buf.write(bytes([random.randint(0, 255)]))
276+
for _ in range(150000):
277+
buf.write(bytes([random.randint(0, 255)])) # noqa: S311
278278
to_unicode(buf.getvalue(), "utf-16-le")
279279
to_unicode(buf.getvalue(), "utf-16-be")
280280
to_unicode(buf.getvalue(), "utf-32-le")

tests/test_html.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ def test_missing_semicolon(self):
8484
("&#x41h", "Ah"),
8585
("&#65!", "A!"),
8686
("&#65x", "Ax"),
87-
("&sup3!", "\u00B3!"),
88-
("&Aacute!", "\u00C1!"),
87+
("&sup3!", "\u00b3!"),
88+
("&Aacute!", "\u00c1!"),
8989
("&#9731!", "\u2603!"),
9090
("&#153", "\u2122"),
9191
("&#x99", "\u2122"),
@@ -325,7 +325,6 @@ def test_with_escape_chars(self):
325325

326326

327327
class UnquoteMarkupTest(unittest.TestCase):
328-
329328
sample_txt1 = """<node1>hi, this is sample text with entities: &amp; &copy;
330329
<![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
331330
sample_txt2 = (

tests/test_url.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
import unittest
66
from inspect import isclass
7+
from pathlib import Path
78
from typing import Callable
89
from urllib.parse import urlparse
910

@@ -15,7 +16,6 @@
1516
_ASCII_TAB_OR_NEWLINE,
1617
_C0_CONTROL_OR_SPACE,
1718
)
18-
from w3lib._types import StrOrBytes
1919
from w3lib._url import _SPECIAL_SCHEMES
2020
from w3lib.url import (
2121
add_or_replace_parameter,
@@ -37,7 +37,7 @@
3737
# input parameters.
3838
#
3939
# (encoding, input URL, output URL or exception)
40-
SAFE_URL_ENCODING_CASES: list[tuple[str | None, StrOrBytes, str | type[Exception]]] = [
40+
SAFE_URL_ENCODING_CASES: list[tuple[str | None, str | bytes, str | type[Exception]]] = [
4141
(None, "", ValueError),
4242
(None, "https://example.com", "https://example.com"),
4343
(None, "https://example.com/©", "https://example.com/%C2%A9"),
@@ -317,7 +317,7 @@
317317

318318

319319
def _test_safe_url_func(
320-
url: StrOrBytes,
320+
url: str | bytes,
321321
*,
322322
encoding: str | None = None,
323323
output: str | type[Exception],
@@ -336,7 +336,7 @@ def _test_safe_url_func(
336336

337337

338338
def _test_safe_url_string(
339-
url: StrOrBytes,
339+
url: str | bytes,
340340
*,
341341
encoding: str | None = None,
342342
output: str | type[Exception],
@@ -373,7 +373,7 @@ def _test_safe_url_string(
373373
),
374374
)
375375
def test_safe_url_string_encoding(
376-
encoding: str | None, url: StrOrBytes, output: str | type[Exception]
376+
encoding: str | None, url: str | bytes, output: str | type[Exception]
377377
) -> None:
378378
_test_safe_url_string(url, encoding=encoding, output=output)
379379

@@ -439,7 +439,7 @@ def test_safe_url_string_encoding(
439439
for case in SAFE_URL_URL_CASES
440440
),
441441
)
442-
def test_safe_url_string_url(url: StrOrBytes, output: str | type[Exception]) -> None:
442+
def test_safe_url_string_url(url: str | bytes, output: str | type[Exception]) -> None:
443443
_test_safe_url_string(url, output=output)
444444

445445

@@ -993,7 +993,7 @@ def test_add_or_replace_parameters(self):
993993
def test_add_or_replace_parameters_does_not_change_input_param(self):
994994
url = "http://domain/test?arg=original"
995995
input_param = {"arg": "value"}
996-
add_or_replace_parameters(url, input_param) # noqa
996+
add_or_replace_parameters(url, input_param)
997997
self.assertEqual(input_param, {"arg": "value"})
998998

999999
def test_url_query_cleaner(self):
@@ -1099,7 +1099,7 @@ def test_path_to_file_uri(self):
10991099
fn = "test.txt"
11001100
x = path_to_file_uri(fn)
11011101
self.assertTrue(x.startswith("file:///"))
1102-
self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower())
1102+
self.assertEqual(file_uri_to_path(x).lower(), str(Path(fn).resolve()).lower())
11031103

11041104
def test_file_uri_to_path(self):
11051105
if os.name == "nt":
@@ -1580,8 +1580,7 @@ def test_base64(self):
15801580

15811581
def test_base64_spaces(self):
15821582
result = parse_data_uri(
1583-
"data:text/plain;base64,SGVsb%20G8sIH%0A%20%20"
1584-
"dvcm%20%20%20xk%20Lg%3D%0A%3D"
1583+
"data:text/plain;base64,SGVsb%20G8sIH%0A%20%20dvcm%20%20%20xk%20Lg%3D%0A%3D"
15851584
)
15861585
self.assertEqual(result.media_type, "text/plain")
15871586
self.assertEqual(result.data, b"Hello, world.")

tests/test_util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
class ToBytesTestCase(TestCase):
99
def test_type_error(self):
1010
with raises(TypeError):
11-
to_bytes(True) # type: ignore
11+
to_bytes(True) # type: ignore[arg-type]
1212

1313

1414
class ToUnicodeTestCase(TestCase):
1515
def test_type_error(self):
1616
with raises(TypeError):
17-
to_unicode(True) # type: ignore
17+
to_unicode(True) # type: ignore[arg-type]

w3lib/_types.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,3 @@
44

55
# the base class UnicodeError doesn't have attributes like start / end
66
AnyUnicodeError = Union[UnicodeEncodeError, UnicodeDecodeError]
7-
StrOrBytes = Union[str, bytes]

w3lib/encoding.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from typing import Callable, cast
1212

1313
import w3lib.util
14-
from w3lib._types import AnyUnicodeError, StrOrBytes
14+
from w3lib._types import AnyUnicodeError
1515

16-
_HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.I)
16+
_HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.IGNORECASE)
1717

1818

1919
def http_content_type_encoding(content_type: str | None) -> str | None:
@@ -52,17 +52,14 @@ def http_content_type_encoding(content_type: str | None) -> str | None:
5252
_XML_ENCODING_RE = _TEMPLATE % ("encoding", r"(?P<xmlcharset>[\w-]+)")
5353

5454
# check for meta tags, or xml decl. and stop search if a body tag is encountered
55-
_BODY_ENCODING_PATTERN = (
56-
r"<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)"
57-
% (_SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
58-
)
59-
_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
55+
_BODY_ENCODING_PATTERN = rf"<\s*(?:meta{_SKIP_ATTRS}(?:(?:\s+{_HTTPEQUIV_RE}|\s+{_CONTENT_RE}){{2}}|\s+{_CONTENT2_RE})|\?xml\s[^>]+{_XML_ENCODING_RE}|body)"
56+
_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.IGNORECASE | re.VERBOSE)
6057
_BODY_ENCODING_BYTES_RE = re.compile(
61-
_BODY_ENCODING_PATTERN.encode("ascii"), re.I | re.VERBOSE
58+
_BODY_ENCODING_PATTERN.encode("ascii"), re.IGNORECASE | re.VERBOSE
6259
)
6360

6461

65-
def html_body_declared_encoding(html_body_str: StrOrBytes) -> str | None:
62+
def html_body_declared_encoding(html_body_str: str | bytes) -> str | None:
6663
'''Return the encoding specified in meta tags in the html body,
6764
or ``None`` if no suitable encoding was found
6865
@@ -209,9 +206,9 @@ def read_bom(data: bytes) -> tuple[None, None] | tuple[str, bytes]:
209206

210207

211208
def to_unicode(data_str: bytes, encoding: str) -> str:
212-
"""Convert a str object to unicode using the encoding given
209+
r"""Convert a str object to unicode using the encoding given
213210
214-
Characters that cannot be converted will be converted to ``\\ufffd`` (the
211+
Characters that cannot be converted will be converted to ``\ufffd`` (the
215212
unicode replacement character).
216213
"""
217214
return data_str.decode(encoding, "replace")
@@ -260,7 +257,7 @@ def html_to_unicode(
260257
that the header was not present.
261258
262259
This method will not fail, if characters cannot be converted to unicode,
263-
``\\ufffd`` (the unicode replacement character) will be inserted instead.
260+
``\ufffd`` (the unicode replacement character) will be inserted instead.
264261
265262
Returns a tuple of ``(<encoding used>, <unicode_string>)``
266263

0 commit comments

Comments
 (0)