scrapy
diff --git a/‎.bandit.yml
Lines changed: 0 additions & 5 deletions b/‎.bandit.yml
Lines changed: 0 additions & 5 deletions
diff --git a/‎.flake8
Lines changed: 0 additions & 2 deletions b/‎.flake8
Lines changed: 0 additions & 2 deletions
diff --git a/‎.isort.cfg
Lines changed: 0 additions & 2 deletions b/‎.isort.cfg
Lines changed: 0 additions & 2 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 21 deletions b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 21 deletions
diff --git a/‎docs/conf.py
Lines changed: 2 additions & 2 deletions b/‎docs/conf.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml
Lines changed: 118 additions & 0 deletions b/‎pyproject.toml
Lines changed: 118 additions & 0 deletions
diff --git a/‎setup.py
Lines changed: 3 additions & 4 deletions b/‎setup.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎tests/test_encoding.py
Lines changed: 3 additions & 3 deletions b/‎tests/test_encoding.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/test_html.py
Lines changed: 2 additions & 3 deletions b/‎tests/test_html.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎tests/test_url.py
Lines changed: 9 additions & 10 deletions b/‎tests/test_url.py
Lines changed: 9 additions & 10 deletions
diff --git a/‎tests/test_util.py
Lines changed: 2 additions & 2 deletions b/‎tests/test_util.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎w3lib/_types.py
Lines changed: 0 additions & 1 deletion b/‎w3lib/_types.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎w3lib/encoding.py
Lines changed: 9 additions & 12 deletions b/‎w3lib/encoding.py
Lines changed: 9 additions & 12 deletions
@@ -1,23 +1,7 @@
 repos:
-- repo: https://github.com/PyCQA/bandit
-  rev: 1.8.2
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.9.3
   hooks:
-  - id: bandit
-    args: [-r, -c, .bandit.yml]
-- repo: https://github.com/PyCQA/flake8
-  rev: 7.1.1
-  hooks:
-  - id: flake8
-- repo: https://github.com/psf/black.git
-  rev: 24.10.0
-  hooks:
-  - id: black
-- repo: https://github.com/pycqa/isort
-  rev: 5.13.2
-  hooks:
-  - id: isort
-- repo: https://github.com/asottile/pyupgrade
-  rev: v3.19.1
-  hooks:
-  - id: pyupgrade
-    args: [--py39-plus]
+    - id: ruff
+      args: [ --fix ]
+    - id: ruff-format
@@ -9,13 +9,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import os
 import sys
+from pathlib import Path
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath(".."))
+sys.path.insert(0, str(Path(__file__).parent.parent))
 
 # -- General configuration -----------------------------------------------------
 
 
@@ -0,0 +1,118 @@
+[tool.ruff.lint]
+extend-select = [
+    # flake8-bugbear
+    "B",
+    # flake8-comprehensions
+    "C4",
+    # pydocstyle
+    "D",
+    # flake8-future-annotations
+    "FA",
+    # flynt
+    "FLY",
+    # refurb
+    "FURB",
+    # isort
+    "I",
+    # flake8-implicit-str-concat
+    "ISC",
+    # flake8-logging
+    "LOG",
+    # Perflint
+    "PERF",
+    # pygrep-hooks
+    "PGH",
+    # flake8-pie
+    "PIE",
+    # pylint
+    "PL",
+    # flake8-use-pathlib
+    "PTH",
+    # flake8-pyi
+    "PYI",
+    # flake8-quotes
+    "Q",
+    # flake8-return
+    "RET",
+    # flake8-raise
+    "RSE",
+    # Ruff-specific rules
+    "RUF",
+    # flake8-bandit
+    "S",
+    # flake8-simplify
+    "SIM",
+    # flake8-slots
+    "SLOT",
+    # flake8-debugger
+    "T10",
+    # flake8-type-checking
+    "TC",
+    # pyupgrade
+    "UP",
+    # pycodestyle warnings
+    "W",
+    # flake8-2020
+    "YTT",
+]
+ignore = [
+    # Within an `except` clause, raise exceptions with `raise ... from`
+    "B904",
+    # Missing docstring in public module
+    "D100",
+    # Missing docstring in public class
+    "D101",
+    # Missing docstring in public method
+    "D102",
+    # Missing docstring in public function
+    "D103",
+    # Missing docstring in public package
+    "D104",
+    # Missing docstring in magic method
+    "D105",
+    # Missing docstring in public nested class
+    "D106",
+    # Missing docstring in __init__
+    "D107",
+    # One-line docstring should fit on one line with quotes
+    "D200",
+    # No blank lines allowed after function docstring
+    "D202",
+    # 1 blank line required between summary line and description
+    "D205",
+    # Multi-line docstring closing quotes should be on a separate line
+    "D209",
+    # First line should end with a period
+    "D400",
+    # First line should be in imperative mood; try rephrasing
+    "D401",
+    # First line should not be the function's "signature"
+    "D402",
+    # First word of the first line should be properly capitalized
+    "D403",
+    # No blank lines allowed between a section header and its content
+    "D412",
+    # Too many return statements
+    "PLR0911",
+    # Too many branches
+    "PLR0912",
+    # Too many arguments in function definition
+    "PLR0913",
+    # Too many statements
+    "PLR0915",
+    # Magic value used in comparison
+    "PLR2004",
+    # String contains ambiguous {}.
+    "RUF001",
+    # Docstring contains ambiguous {}.
+    "RUF002",
+    # Comment contains ambiguous {}.
+    "RUF003",
+    # Mutable class attributes should be annotated with `typing.ClassVar`
+    "RUF012",
+    # Use of `assert` detected
+    "S101",
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "pep257"
@@ -1,14 +1,13 @@
-from setuptools import find_packages, setup
+from pathlib import Path
 
-with open("README.rst", encoding="utf-8") as f:
-    long_description = f.read()
+from setuptools import find_packages, setup
 
 setup(
     name="w3lib",
     version="2.3.1",
     license="BSD",
     description="Library of web-related functions",
-    long_description=long_description,
+    long_description=Path("README.rst").read_text(encoding="utf-8"),
     long_description_content_type="text/x-rst",
     author="Scrapy project",
     author_email="info@scrapy.org",
 
@@ -33,7 +33,7 @@ class RequestEncodingTests(unittest.TestCase):
 
     def test_bom(self):
         # cjk water character in unicode
-        water_unicode = "\u6C34"
+        water_unicode = "\u6c34"
         # BOM + water character encoded
         utf16be = b"\xfe\xff\x6c\x34"
         utf16le = b"\xff\xfe\x34\x6c"
@@ -273,8 +273,8 @@ def test_python_crash(self):
 
         random.seed(42)
         buf = BytesIO()
-        for i in range(150000):
-            buf.write(bytes([random.randint(0, 255)]))
+        for _ in range(150000):
+            buf.write(bytes([random.randint(0, 255)]))  # noqa: S311
         to_unicode(buf.getvalue(), "utf-16-le")
         to_unicode(buf.getvalue(), "utf-16-be")
         to_unicode(buf.getvalue(), "utf-32-le")
 
@@ -84,8 +84,8 @@ def test_missing_semicolon(self):
             ("&#x41h", "Ah"),
             ("&#65!", "A!"),
             ("&#65x", "Ax"),
-            ("&sup3!", "\u00B3!"),
-            ("&Aacute!", "\u00C1!"),
+            ("&sup3!", "\u00b3!"),
+            ("&Aacute!", "\u00c1!"),
             ("&#9731!", "\u2603!"),
             ("&#153", "\u2122"),
             ("&#x99", "\u2122"),
@@ -325,7 +325,6 @@ def test_with_escape_chars(self):
 
 
 class UnquoteMarkupTest(unittest.TestCase):
-
     sample_txt1 = """<node1>hi, this is sample text with entities: &amp; &copy;
 <![CDATA[although this is inside a cdata! &amp; &quot;]]></node1>"""
     sample_txt2 = (
 
@@ -4,6 +4,7 @@
 import sys
 import unittest
 from inspect import isclass
+from pathlib import Path
 from typing import Callable
 from urllib.parse import urlparse
 
@@ -15,7 +16,6 @@
     _ASCII_TAB_OR_NEWLINE,
     _C0_CONTROL_OR_SPACE,
 )
-from w3lib._types import StrOrBytes
 from w3lib._url import _SPECIAL_SCHEMES
 from w3lib.url import (
     add_or_replace_parameter,
@@ -37,7 +37,7 @@
 # input parameters.
 #
 # (encoding, input URL, output URL or exception)
-SAFE_URL_ENCODING_CASES: list[tuple[str | None, StrOrBytes, str | type[Exception]]] = [
+SAFE_URL_ENCODING_CASES: list[tuple[str | None, str | bytes, str | type[Exception]]] = [
     (None, "", ValueError),
     (None, "https://example.com", "https://example.com"),
     (None, "https://example.com/©", "https://example.com/%C2%A9"),
@@ -317,7 +317,7 @@
 
 
 def _test_safe_url_func(
-    url: StrOrBytes,
+    url: str | bytes,
     *,
     encoding: str | None = None,
     output: str | type[Exception],
@@ -336,7 +336,7 @@ def _test_safe_url_func(
 
 
 def _test_safe_url_string(
-    url: StrOrBytes,
+    url: str | bytes,
     *,
     encoding: str | None = None,
     output: str | type[Exception],
@@ -373,7 +373,7 @@ def _test_safe_url_string(
     ),
 )
 def test_safe_url_string_encoding(
-    encoding: str | None, url: StrOrBytes, output: str | type[Exception]
+    encoding: str | None, url: str | bytes, output: str | type[Exception]
 ) -> None:
     _test_safe_url_string(url, encoding=encoding, output=output)
 
@@ -439,7 +439,7 @@ def test_safe_url_string_encoding(
         for case in SAFE_URL_URL_CASES
     ),
 )
-def test_safe_url_string_url(url: StrOrBytes, output: str | type[Exception]) -> None:
+def test_safe_url_string_url(url: str | bytes, output: str | type[Exception]) -> None:
     _test_safe_url_string(url, output=output)
 
 
@@ -993,7 +993,7 @@ def test_add_or_replace_parameters(self):
     def test_add_or_replace_parameters_does_not_change_input_param(self):
         url = "http://domain/test?arg=original"
         input_param = {"arg": "value"}
-        add_or_replace_parameters(url, input_param)  # noqa
+        add_or_replace_parameters(url, input_param)
         self.assertEqual(input_param, {"arg": "value"})
 
     def test_url_query_cleaner(self):
@@ -1099,7 +1099,7 @@ def test_path_to_file_uri(self):
         fn = "test.txt"
         x = path_to_file_uri(fn)
         self.assertTrue(x.startswith("file:///"))
-        self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower())
+        self.assertEqual(file_uri_to_path(x).lower(), str(Path(fn).resolve()).lower())
 
     def test_file_uri_to_path(self):
         if os.name == "nt":
@@ -1580,8 +1580,7 @@ def test_base64(self):
 
     def test_base64_spaces(self):
         result = parse_data_uri(
-            "data:text/plain;base64,SGVsb%20G8sIH%0A%20%20"
-            "dvcm%20%20%20xk%20Lg%3D%0A%3D"
+            "data:text/plain;base64,SGVsb%20G8sIH%0A%20%20dvcm%20%20%20xk%20Lg%3D%0A%3D"
         )
         self.assertEqual(result.media_type, "text/plain")
         self.assertEqual(result.data, b"Hello, world.")
 
@@ -8,10 +8,10 @@
 class ToBytesTestCase(TestCase):
     def test_type_error(self):
         with raises(TypeError):
-            to_bytes(True)  # type: ignore
+            to_bytes(True)  # type: ignore[arg-type]
 
 
 class ToUnicodeTestCase(TestCase):
     def test_type_error(self):
         with raises(TypeError):
-            to_unicode(True)  # type: ignore
+            to_unicode(True)  # type: ignore[arg-type]
@@ -4,4 +4,3 @@
 
 # the base class UnicodeError doesn't have attributes like start / end
 AnyUnicodeError = Union[UnicodeEncodeError, UnicodeDecodeError]
-StrOrBytes = Union[str, bytes]
@@ -11,9 +11,9 @@
 from typing import Callable, cast
 
 import w3lib.util
-from w3lib._types import AnyUnicodeError, StrOrBytes
+from w3lib._types import AnyUnicodeError
 
-_HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.I)
+_HEADER_ENCODING_RE = re.compile(r"charset=([\w-]+)", re.IGNORECASE)
 
 
 def http_content_type_encoding(content_type: str | None) -> str | None:
@@ -52,17 +52,14 @@ def http_content_type_encoding(content_type: str | None) -> str | None:
 _XML_ENCODING_RE = _TEMPLATE % ("encoding", r"(?P<xmlcharset>[\w-]+)")
 
 # check for meta tags, or xml decl. and stop search if a body tag is encountered
-_BODY_ENCODING_PATTERN = (
-    r"<\s*(?:meta%s(?:(?:\s+%s|\s+%s){2}|\s+%s)|\?xml\s[^>]+%s|body)"
-    % (_SKIP_ATTRS, _HTTPEQUIV_RE, _CONTENT_RE, _CONTENT2_RE, _XML_ENCODING_RE)
-)
-_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.I | re.VERBOSE)
+_BODY_ENCODING_PATTERN = rf"<\s*(?:meta{_SKIP_ATTRS}(?:(?:\s+{_HTTPEQUIV_RE}|\s+{_CONTENT_RE}){{2}}|\s+{_CONTENT2_RE})|\?xml\s[^>]+{_XML_ENCODING_RE}|body)"
+_BODY_ENCODING_STR_RE = re.compile(_BODY_ENCODING_PATTERN, re.IGNORECASE | re.VERBOSE)
 _BODY_ENCODING_BYTES_RE = re.compile(
-    _BODY_ENCODING_PATTERN.encode("ascii"), re.I | re.VERBOSE
+    _BODY_ENCODING_PATTERN.encode("ascii"), re.IGNORECASE | re.VERBOSE
 )
 
 
-def html_body_declared_encoding(html_body_str: StrOrBytes) -> str | None:
+def html_body_declared_encoding(html_body_str: str | bytes) -> str | None:
     '''Return the encoding specified in meta tags in the html body,
     or ``None`` if no suitable encoding was found
 
@@ -209,9 +206,9 @@ def read_bom(data: bytes) -> tuple[None, None] | tuple[str, bytes]:
 
 
 def to_unicode(data_str: bytes, encoding: str) -> str:
-    """Convert a str object to unicode using the encoding given
+    r"""Convert a str object to unicode using the encoding given
 
-    Characters that cannot be converted will be converted to ``\\ufffd`` (the
+    Characters that cannot be converted will be converted to ``\ufffd`` (the
     unicode replacement character).
     """
     return data_str.decode(encoding, "replace")
@@ -260,7 +257,7 @@ def html_to_unicode(
     that the header was not present.
 
     This method will not fail, if characters cannot be converted to unicode,
-    ``\\ufffd`` (the unicode replacement character) will be inserted instead.
+    ``\ufffd`` (the unicode replacement character) will be inserted instead.
 
     Returns a tuple of ``(<encoding used>, <unicode_string>)``
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,3 @@`
`4`	`4`
`5`	`5`	`# the base class UnicodeError doesn't have attributes like start / end`
`6`	`6`	`AnyUnicodeError = Union[UnicodeEncodeError, UnicodeDecodeError]`
`7`		`-StrOrBytes = Union[str, bytes]`