Merge branch 'w3lib-warnings'

DamonGuo · Aug 4, 2014 · 9fb7b36 · 9fb7b36
2 parents 511a269 + 3b64b24
commit 9fb7b36
Show file tree

Hide file tree

Showing 7 changed files with 10 additions and 10 deletions.
diff --git a/debian/control b/debian/control
@@ -9,7 +9,7 @@ Homepage: http://scrapy.org/
 Package: scrapy-SUFFIX
 Architecture: all
 Depends: ${python:Depends}, python-lxml, python-twisted, python-openssl,
- python-w3lib (>= 1.2), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2)
+ python-w3lib (>= 1.8.0), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2)
 Recommends: python-setuptools
 Conflicts: python-scrapy, scrapy, scrapy-0.11
 Provides: python-scrapy, scrapy

diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,6 @@ Twisted>=10.0.0
 lxml
 pyOpenSSL
 cssselect>=0.9
-w3lib>=1.2
+w3lib>=1.8.0
 queuelib
 six>=1.5.2
diff --git a/scrapy/contrib/downloadermiddleware/ajaxcrawl.py b/scrapy/contrib/downloadermiddleware/ajaxcrawl.py
@@ -84,6 +84,6 @@ def _has_ajaxcrawlable_meta(text):
 
     text = _script_re.sub(u'', text)
     text = _noscript_re.sub(u'', text)
-    text = html.remove_comments(html.remove_entities(text))
+    text = html.remove_comments(html.replace_entities(text))
     return _ajax_crawlable_re.search(text) is not None
 
diff --git a/scrapy/contrib/linkextractors/regex.py b/scrapy/contrib/linkextractors/regex.py
@@ -1,7 +1,7 @@
 import re
 from six.moves.urllib.parse import urljoin
 
-from w3lib.html import remove_tags, remove_entities, replace_escape_chars
+from w3lib.html import remove_tags, replace_entities, replace_escape_chars
 
 from scrapy.link import Link
 from .sgml import SgmlLinkExtractor
@@ -21,7 +21,7 @@ def _extract_links(self, response_text, response_url, response_encoding, base_ur
         if base_url is None:
             base_url = urljoin(response_url, self.base_url) if self.base_url else response_url
 
-        clean_url = lambda u: urljoin(base_url, remove_entities(clean_link(u.decode(response_encoding))))
+        clean_url = lambda u: urljoin(base_url, replace_entities(clean_link(u.decode(response_encoding))))
         clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()
 
         links_text = linkre.findall(response_text)

diff --git a/scrapy/utils/misc.py b/scrapy/utils/misc.py
@@ -5,7 +5,7 @@
 from pkgutil import iter_modules
 
 import six
-from w3lib.html import remove_entities
+from w3lib.html import replace_entities
 
 from scrapy.utils.python import flatten
 from scrapy.item import BaseItem
@@ -94,9 +94,9 @@ def extract_regex(regex, text, encoding='utf-8'):
     strings = flatten(strings)
 
     if isinstance(text, unicode):
-        return [remove_entities(s, keep=['lt', 'amp']) for s in strings]
+        return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
     else:
-        return [remove_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
+        return [replace_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
 
 
 def md5sum(file):

diff --git a/setup.py b/setup.py
@@ -124,7 +124,7 @@ def is_not_module(filename):
 else:
     setup_args['install_requires'] = [
         'Twisted>=10.0.0',
-        'w3lib>=1.2',
+        'w3lib>=1.8.0',
         'queuelib',
         'lxml',
         'pyOpenSSL',

diff --git a/tox.ini b/tox.ini
@@ -46,7 +46,7 @@ deps =
     pyOpenSSL>=0.13.1
     cssselect>=0.9
     queuelib>=1.1.1
-    w3lib>=1.5
+    w3lib>=1.8.0
     Pillow
     # tests requirements
     mock