scrapy · Gallaecio · Mar 24, 2021 · Mar 20, 2021 · Mar 20, 2021 · Mar 20, 2021
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -18,13 +18,13 @@ jobs:
         - python-version: 3.7
           env:
             TOXENV: docs
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: flake8
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: pylint
-        - python-version: 3.8
+        - python-version: 3.9
           env:
             TOXENV: security
 

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -12,10 +12,10 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
-    - name: Set up Python 3.8
+    - name: Set up Python 3.9
       uses: actions/setup-python@v2
       with:
-        python-version: 3.8
+        python-version: 3.9
 
     - name: Check Tag
       id: check-release-tag

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: [2.7, 3.5, 3.6, 3.7, 3.8, pypy3]
+        python-version: [3.6, 3.7, 3.8, 3.9, pypy3]
 
     steps:
     - uses: actions/checkout@v2

diff --git a/README.rst b/README.rst
@@ -27,7 +27,7 @@ This is a Python library of web-related functions, such as:
 Requirements
 ============
 
-Python 2.7 or Python 3.5+
+Python 3.6+
 
 Install
 =======

diff --git a/docs/conf.py b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # w3lib documentation build configuration file, created by
 # sphinx-quickstart on Sun Jan 26 22:19:38 2014.
 #
@@ -47,8 +45,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'w3lib'
-copyright = u'2014, w3lib developers'
+project = 'w3lib'
+copyright = '2014, w3lib developers'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -190,8 +188,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'w3lib.tex', u'w3lib Documentation',
-   u'w3lib developers', 'manual'),
+  ('index', 'w3lib.tex', 'w3lib Documentation',
+   'w3lib developers', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -220,8 +218,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'w3lib', u'w3lib Documentation',
-     [u'w3lib developers'], 1)
+    ('index', 'w3lib', 'w3lib Documentation',
+     ['w3lib developers'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -234,8 +232,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'w3lib', u'w3lib Documentation',
-   u'w3lib developers', 'w3lib', 'One line description of project.',
+  ('index', 'w3lib', 'w3lib Documentation',
+   'w3lib developers', 'w3lib', 'One line description of project.',
    'Miscellaneous'),
 ]
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -28,7 +28,7 @@ Modules
 Requirements
 ============
 
-Python 2.7 or Python 3.3+
+Python 3.6+
 
 Install
 =======

diff --git a/pytest.ini b/pytest.ini
@@ -1,6 +1,8 @@
 [pytest]
 doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
 flake8-ignore =
+    W503  # https://www.flake8rules.com/rules/W503.html
+
     docs/conf.py E121 E122 E265 E401 E501
     tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731
     tests/test_form.py E265 E501

diff --git a/setup.py b/setup.py
@@ -18,16 +18,13 @@
         'License :: OSI Approved :: BSD License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: Implementation :: CPython',
         'Programming Language :: Python :: Implementation :: PyPy',
         'Topic :: Internet :: WWW/HTTP',
     ],
-    install_requires=['six >= 1.4.1'],
 )
diff --git a/stdeb.cfg b/stdeb.cfg
diff --git a/tests/py3-ignores.txt b/tests/py3-ignores.txt
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -1,7 +1,14 @@
-import unittest, codecs
-import six
-from w3lib.encoding import (html_body_declared_encoding, read_bom, to_unicode,
-        http_content_type_encoding, resolve_encoding, html_to_unicode)
+import codecs
+import unittest
+
+from w3lib.encoding import (
+    html_body_declared_encoding,
+    http_content_type_encoding,
+    html_to_unicode,
+    read_bom,
+    resolve_encoding,
+    to_unicode,
+)
 
 class RequestEncodingTests(unittest.TestCase):
     utf8_fragments = [
@@ -22,7 +29,7 @@ class RequestEncodingTests(unittest.TestCase):
 
     def test_bom(self):
         # cjk water character in unicode
-        water_unicode = u'\u6C34'
+        water_unicode = '\u6C34'
         # BOM + water character encoded
         utf16be = b'\xfe\xff\x6c\x34'
         utf16le = b'\xff\xfe\x34\x6c'
@@ -62,19 +69,19 @@ def test_html_body_declared_encoding(self):
 
     def test_html_body_declared_encoding_unicode(self):
         # html_body_declared_encoding should work when unicode body is passed
-        self.assertEqual(None, html_body_declared_encoding(u"something else"))
+        self.assertEqual(None, html_body_declared_encoding("something else"))
 
         for fragment in self.utf8_fragments:
             encoding = html_body_declared_encoding(fragment.decode('utf8'))
             self.assertEqual(encoding, 'utf-8', fragment)
 
-        self.assertEqual(None, html_body_declared_encoding(u"""
+        self.assertEqual(None, html_body_declared_encoding("""
             <head></head><body>
             this isn't searched
             <meta charset="utf-8">
         """))
         self.assertEqual(None, html_body_declared_encoding(
-            u"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""))
+            """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""))
 
 
 class CodecsEncodingTestCase(unittest.TestCase):
@@ -88,10 +95,10 @@ def test_resolve_encoding(self):
 class UnicodeDecodingTestCase(unittest.TestCase):
 
     def test_utf8(self):
-        self.assertEqual(to_unicode(b'\xc2\xa3', 'utf-8'), u'\xa3')
+        self.assertEqual(to_unicode(b'\xc2\xa3', 'utf-8'), '\xa3')
 
     def test_invalid_utf8(self):
-        self.assertEqual(to_unicode(b'\xc2\xc2\xa3', 'utf-8'), u'\ufffd\xa3')
+        self.assertEqual(to_unicode(b'\xc2\xc2\xa3', 'utf-8'), '\ufffd\xa3')
 
 
 def ct(charset):
@@ -103,22 +110,22 @@ def norm_encoding(enc):
 class HtmlConversionTests(unittest.TestCase):
 
     def test_unicode_body(self):
-        unicode_string = u'\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
+        unicode_string = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442'
         original_string = unicode_string.encode('cp1251')
         encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string)
         # check body_as_unicode
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(body_unicode, unicode_string)
 
     def _assert_encoding(self, content_type, body, expected_encoding,
                 expected_unicode):
-        assert not isinstance(body, six.text_type)
+        assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body)
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(norm_encoding(encoding),
                 norm_encoding(expected_encoding))
 
-        if isinstance(expected_unicode, six.string_types):
+        if isinstance(expected_unicode, str):
             self.assertEqual(body_unicode, expected_unicode)
         else:
             self.assertTrue(
@@ -130,23 +137,23 @@ def test_content_type_and_conversion(self):
         """Test content type header is interpreted and text converted as
         expected
         """
-        self._assert_encoding('utf-8', b"\xc2\xa3", 'utf-8', u"\xa3")
+        self._assert_encoding('utf-8', b"\xc2\xa3", 'utf-8', "\xa3")
         # something like this in the scrapy tests - but that's invalid?
-        # self._assert_encoding('', "\xa3", 'utf-8', u"\xa3")
+        # self._assert_encoding('', "\xa3", 'utf-8', "\xa3")
         # iso-8859-1 is overridden to cp1252
-        self._assert_encoding('iso-8859-1', b"\xa3", 'cp1252', u"\xa3")
-        self._assert_encoding('', b"\xc2\xa3", 'utf-8', u"\xa3")
-        self._assert_encoding('none', b"\xc2\xa3", 'utf-8', u"\xa3")
-        self._assert_encoding('gb2312', b"\xa8D", 'gb18030', u"\u2015")
-        self._assert_encoding('gbk', b"\xa8D", 'gb18030', u"\u2015")
-        self._assert_encoding('big5', b"\xf9\xda", 'big5hkscs', u"\u6052")
+        self._assert_encoding('iso-8859-1', b"\xa3", 'cp1252', "\xa3")
+        self._assert_encoding('', b"\xc2\xa3", 'utf-8', "\xa3")
+        self._assert_encoding('none', b"\xc2\xa3", 'utf-8', "\xa3")
+        self._assert_encoding('gb2312', b"\xa8D", 'gb18030', "\u2015")
+        self._assert_encoding('gbk', b"\xa8D", 'gb18030', "\u2015")
+        self._assert_encoding('big5', b"\xf9\xda", 'big5hkscs', "\u6052")
 
     def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffdWORD2')
+                'utf-8', 'WORD\ufffdWORD2')
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffdWORD2')
+                'utf-8', 'WORD\ufffdWORD2')
 
     def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
         # Python implementations handle unexpected end of UTF8 data
@@ -156,69 +163,69 @@ def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
 
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xab",
-                'utf-8', [u'WORD\ufffd\ufffd', u'WORD\ufffd'])
+                'utf-8', ['WORD\ufffd\ufffd', 'WORD\ufffd'])
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xab",
-                'utf-8', [u'WORD\ufffd\ufffd', u'WORD\ufffd'])
+                'utf-8', ['WORD\ufffd\ufffd', 'WORD\ufffd'])
 
     def test_replace_wrong_encoding(self):
         """Test invalid chars are replaced properly"""
         encoding, body_unicode = html_to_unicode(ct('utf-8'),
                 b'PREFIX\xe3\xabSUFFIX')
         # XXX: Policy for replacing invalid chars may suffer minor variations
-        # but it should always contain the unicode replacement char (u'\ufffd')
-        assert u'\ufffd' in body_unicode, repr(body_unicode)
-        assert u'PREFIX' in body_unicode, repr(body_unicode)
-        assert u'SUFFIX' in body_unicode, repr(body_unicode)
+        # but it should always contain the unicode replacement char ('\ufffd')
+        assert '\ufffd' in body_unicode, repr(body_unicode)
+        assert 'PREFIX' in body_unicode, repr(body_unicode)
+        assert 'SUFFIX' in body_unicode, repr(body_unicode)
 
         # Do not destroy html tags due to encoding bugs
         encoding, body_unicode = html_to_unicode(ct('utf-8'),
             b'\xf0<span>value</span>')
-        assert u'<span>value</span>' in body_unicode, repr(body_unicode)
+        assert '<span>value</span>' in body_unicode, repr(body_unicode)
 
     def _assert_encoding_detected(self, content_type, expected_encoding, body,
             **kwargs):
-        assert not isinstance(body, six.text_type)
+        assert not isinstance(body, str)
         encoding, body_unicode  = html_to_unicode(ct(content_type), body, **kwargs)
-        self.assertTrue(isinstance(body_unicode, six.text_type))
+        self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(norm_encoding(encoding),  norm_encoding(expected_encoding))
 
     def test_BOM(self):
         # utf-16 cases already tested, as is the BOM detection function
 
         # http header takes precedence, irrespective of BOM
-        bom_be_str = codecs.BOM_UTF16_BE + u"hi".encode('utf-16-be')
-        expected = u'\ufffd\ufffd\x00h\x00i'
+        bom_be_str = codecs.BOM_UTF16_BE + "hi".encode('utf-16-be')
+        expected = '\ufffd\ufffd\x00h\x00i'
         self._assert_encoding('utf-8', bom_be_str, 'utf-8', expected)
 
         # BOM is stripped when it agrees with the encoding, or used to
         # determine encoding
         bom_utf8_str = codecs.BOM_UTF8 + b'hi'
-        self._assert_encoding('utf-8', bom_utf8_str, 'utf-8', u"hi")
-        self._assert_encoding(None, bom_utf8_str, 'utf-8', u"hi")
+        self._assert_encoding('utf-8', bom_utf8_str, 'utf-8', "hi")
+        self._assert_encoding(None, bom_utf8_str, 'utf-8', "hi")
 
     def test_utf16_32(self):
         # tools.ietf.org/html/rfc2781 section 4.3
 
         # USE BOM and strip it
-        bom_be_str = codecs.BOM_UTF16_BE + u"hi".encode('utf-16-be')
-        self._assert_encoding('utf-16', bom_be_str, 'utf-16-be', u"hi")
-        self._assert_encoding(None, bom_be_str, 'utf-16-be', u"hi")
+        bom_be_str = codecs.BOM_UTF16_BE + "hi".encode('utf-16-be')
+        self._assert_encoding('utf-16', bom_be_str, 'utf-16-be', "hi")
+        self._assert_encoding(None, bom_be_str, 'utf-16-be', "hi")
 
-        bom_le_str = codecs.BOM_UTF16_LE + u"hi".encode('utf-16-le')
-        self._assert_encoding('utf-16', bom_le_str, 'utf-16-le', u"hi")
-        self._assert_encoding(None, bom_le_str, 'utf-16-le', u"hi")
+        bom_le_str = codecs.BOM_UTF16_LE + "hi".encode('utf-16-le')
+        self._assert_encoding('utf-16', bom_le_str, 'utf-16-le', "hi")
+        self._assert_encoding(None, bom_le_str, 'utf-16-le', "hi")
 
-        bom_be_str = codecs.BOM_UTF32_BE + u"hi".encode('utf-32-be')
-        self._assert_encoding('utf-32', bom_be_str, 'utf-32-be', u"hi")
-        self._assert_encoding(None, bom_be_str, 'utf-32-be', u"hi")
+        bom_be_str = codecs.BOM_UTF32_BE + "hi".encode('utf-32-be')
+        self._assert_encoding('utf-32', bom_be_str, 'utf-32-be', "hi")
+        self._assert_encoding(None, bom_be_str, 'utf-32-be', "hi")
 
-        bom_le_str = codecs.BOM_UTF32_LE + u"hi".encode('utf-32-le')
-        self._assert_encoding('utf-32', bom_le_str, 'utf-32-le', u"hi")
-        self._assert_encoding(None, bom_le_str, 'utf-32-le', u"hi")
+        bom_le_str = codecs.BOM_UTF32_LE + "hi".encode('utf-32-le')
+        self._assert_encoding('utf-32', bom_le_str, 'utf-32-le', "hi")
+        self._assert_encoding(None, bom_le_str, 'utf-32-le', "hi")
 
         # if there is no BOM,  big endian should be chosen
-        self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 'utf-16-be', u"hi")
-        self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 'utf-32-be', u"hi")
+        self._assert_encoding('utf-16', "hi".encode('utf-16-be'), 'utf-16-be', "hi")
+        self._assert_encoding('utf-32', "hi".encode('utf-32-be'), 'utf-32-be', "hi")
 
     def test_python_crash(self):
         import random