Skip to content

Commit

Permalink
Deprecate scrapy.python.utils.FixedSGMLParser
Browse files Browse the repository at this point in the history
SGML link extractor that uses it is also going to be deprecated;
sgmllib module is not available in Python 3.
  • Loading branch information
kmike committed Jul 2, 2014
1 parent e87dc37 commit 2b74966
Showing 1 changed file with 30 additions and 13 deletions.
43 changes: 30 additions & 13 deletions scrapy/utils/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,40 @@
import inspect
import weakref
import errno
import warnings
from functools import partial, wraps
from sgmllib import SGMLParser

import six

class FixedSGMLParser(SGMLParser):
"""The SGMLParser that comes with Python has a bug in the convert_charref()
method. This is the same class with the bug fixed"""
from scrapy.exceptions import ScrapyDeprecationWarning

def convert_charref(self, name):
"""This method fixes a bug in Python's SGMLParser."""
try:
n = int(name)
except ValueError:
return
if not 0 <= n <= 127 : # ASCII ends at 127, not 255
return
return self.convert_codepoint(n)

if six.PY2:
from sgmllib import SGMLParser

class FixedSGMLParser(SGMLParser):
"""The SGMLParser that comes with Python has a bug in the convert_charref()
method. This is the same class with the bug fixed.
Warning: this class is deprecated and will be removed in future releases.
"""

def __init__(self, *args, **kwargs):
warnings.warn(
"FixedSGMLParser is deprecated and will be removed in future releases.",
ScrapyDeprecationWarning
)
SGMLParser.__init__(self, *args, **kwargs)

def convert_charref(self, name):
"""This method fixes a bug in Python's SGMLParser."""
try:
n = int(name)
except ValueError:
return
if not 0 <= n <= 127 : # ASCII ends at 127, not 255
return
return self.convert_codepoint(n)


def flatten(x):
Expand Down

0 comments on commit 2b74966

Please sign in to comment.