Skip to content

Commit

Permalink
Merge branch 'w3lib-warnings'
Browse files Browse the repository at this point in the history
  • Loading branch information
dangra committed Aug 4, 2014
2 parents 511a269 + 3b64b24 commit 9fb7b36
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Homepage: http://scrapy.org/
Package: scrapy-SUFFIX
Architecture: all
Depends: ${python:Depends}, python-lxml, python-twisted, python-openssl,
python-w3lib (>= 1.2), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2)
python-w3lib (>= 1.8.0), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2)
Recommends: python-setuptools
Conflicts: python-scrapy, scrapy, scrapy-0.11
Provides: python-scrapy, scrapy
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ Twisted>=10.0.0
lxml
pyOpenSSL
cssselect>=0.9
w3lib>=1.2
w3lib>=1.8.0
queuelib
six>=1.5.2
2 changes: 1 addition & 1 deletion scrapy/contrib/downloadermiddleware/ajaxcrawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,6 @@ def _has_ajaxcrawlable_meta(text):

text = _script_re.sub(u'', text)
text = _noscript_re.sub(u'', text)
text = html.remove_comments(html.remove_entities(text))
text = html.remove_comments(html.replace_entities(text))
return _ajax_crawlable_re.search(text) is not None

4 changes: 2 additions & 2 deletions scrapy/contrib/linkextractors/regex.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from six.moves.urllib.parse import urljoin

from w3lib.html import remove_tags, remove_entities, replace_escape_chars
from w3lib.html import remove_tags, replace_entities, replace_escape_chars

from scrapy.link import Link
from .sgml import SgmlLinkExtractor
Expand All @@ -21,7 +21,7 @@ def _extract_links(self, response_text, response_url, response_encoding, base_ur
if base_url is None:
base_url = urljoin(response_url, self.base_url) if self.base_url else response_url

clean_url = lambda u: urljoin(base_url, remove_entities(clean_link(u.decode(response_encoding))))
clean_url = lambda u: urljoin(base_url, replace_entities(clean_link(u.decode(response_encoding))))
clean_text = lambda t: replace_escape_chars(remove_tags(t.decode(response_encoding))).strip()

links_text = linkre.findall(response_text)
Expand Down
6 changes: 3 additions & 3 deletions scrapy/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pkgutil import iter_modules

import six
from w3lib.html import remove_entities
from w3lib.html import replace_entities

from scrapy.utils.python import flatten
from scrapy.item import BaseItem
Expand Down Expand Up @@ -94,9 +94,9 @@ def extract_regex(regex, text, encoding='utf-8'):
strings = flatten(strings)

if isinstance(text, unicode):
return [remove_entities(s, keep=['lt', 'amp']) for s in strings]
return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
else:
return [remove_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
return [replace_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]


def md5sum(file):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def is_not_module(filename):
else:
setup_args['install_requires'] = [
'Twisted>=10.0.0',
'w3lib>=1.2',
'w3lib>=1.8.0',
'queuelib',
'lxml',
'pyOpenSSL',
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ deps =
pyOpenSSL>=0.13.1
cssselect>=0.9
queuelib>=1.1.1
w3lib>=1.5
w3lib>=1.8.0
Pillow
# tests requirements
mock
Expand Down

0 comments on commit 9fb7b36

Please sign in to comment.