@@ -109,12 +109,13 @@ def get_events(self):
109109
110110class TestCaseBase (unittest .TestCase ):
111111
112- def get_collector (self ):
113- return EventCollector (convert_charrefs = False )
112+ def get_collector (self , convert_charrefs = False ):
113+ return EventCollector (convert_charrefs = convert_charrefs )
114114
115- def _run_check (self , source , expected_events , collector = None ):
115+ def _run_check (self , source , expected_events ,
116+ * , collector = None , convert_charrefs = False ):
116117 if collector is None :
117- collector = self .get_collector ()
118+ collector = self .get_collector (convert_charrefs = convert_charrefs )
118119 parser = collector
119120 for s in source :
120121 parser .feed (s )
@@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None):
128129
129130 def _run_check_extra (self , source , events ):
130131 self ._run_check (source , events ,
131- EventCollectorExtra (convert_charrefs = False ))
132+ collector = EventCollectorExtra (convert_charrefs = False ))
132133
133134
134135class HTMLParserTestCase (TestCaseBase ):
@@ -187,10 +188,86 @@ def test_malformatted_charref(self):
187188 ])
188189
189190 def test_unclosed_entityref (self ):
190- self ._run_check ("&entityref foo" , [
191- ("entityref" , "entityref" ),
192- ("data" , " foo" ),
193- ])
191+ self ._run_check ('> z' , [('entityref' , 'gt' ), ('data' , ' z' )],
192+ convert_charrefs = False )
193+ self ._run_check ('> z' , [('data' , '> z' )], convert_charrefs = True )
194+
195+ self ._run_check ('&undefined z' ,
196+ [('entityref' , 'undefined' ), ('data' , ' z' )],
197+ convert_charrefs = False )
198+ self ._run_check ('&undefined z' , [('data' , '&undefined z' )],
199+ convert_charrefs = True )
200+
201+ self ._run_check ('>undefined z' ,
202+ [('entityref' , 'gtundefined' ), ('data' , ' z' )],
203+ convert_charrefs = False )
204+ self ._run_check ('>undefined z' , [('data' , '>undefined z' )],
205+ convert_charrefs = True )
206+
207+ self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = False )
208+ self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = True )
209+
210+ def test_eof_in_entityref (self ):
211+ self ._run_check ('>' , [('entityref' , 'gt' )], convert_charrefs = False )
212+ self ._run_check ('>' , [('data' , '>' )], convert_charrefs = True )
213+
214+ self ._run_check ('&g' , [('entityref' , 'g' )], convert_charrefs = False )
215+ self ._run_check ('&g' , [('data' , '&g' )], convert_charrefs = True )
216+
217+ self ._run_check ('&undefined' , [('entityref' , 'undefined' )],
218+ convert_charrefs = False )
219+ self ._run_check ('&undefined' , [('data' , '&undefined' )],
220+ convert_charrefs = True )
221+
222+ self ._run_check ('>undefined' , [('entityref' , 'gtundefined' )],
223+ convert_charrefs = False )
224+ self ._run_check ('>undefined' , [('data' , '>undefined' )],
225+ convert_charrefs = True )
226+
227+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = False )
228+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = True )
229+
230+ def test_unclosed_charref (self ):
231+ self ._run_check ('{ z' , [('charref' , '123' ), ('data' , ' z' )],
232+ convert_charrefs = False )
233+ self ._run_check ('{ z' , [('data' , '{ z' )], convert_charrefs = True )
234+ self ._run_check ('« z' , [('charref' , 'xab' ), ('data' , ' z' )],
235+ convert_charrefs = False )
236+ self ._run_check ('« z' , [('data' , '\xab z' )], convert_charrefs = True )
237+
238+ self ._run_check ('� z' ,
239+ [('charref' , '123456789' ), ('data' , ' z' )],
240+ convert_charrefs = False )
241+ self ._run_check ('� z' , [('data' , '\ufffd z' )],
242+ convert_charrefs = True )
243+ self ._run_check ('� z' ,
244+ [('charref' , 'x123456789' ), ('data' , ' z' )],
245+ convert_charrefs = False )
246+ self ._run_check ('� z' , [('data' , '\ufffd z' )],
247+ convert_charrefs = True )
248+
249+ self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = False )
250+ self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = True )
251+ self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = False )
252+ self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = True )
253+
254+ def test_eof_in_charref (self ):
255+ self ._run_check ('{' , [('charref' , '123' )], convert_charrefs = False )
256+ self ._run_check ('{' , [('data' , '{' )], convert_charrefs = True )
257+ self ._run_check ('«' , [('charref' , 'xab' )], convert_charrefs = False )
258+ self ._run_check ('«' , [('data' , '\xab ' )], convert_charrefs = True )
259+
260+ self ._run_check ('�' , [('charref' , '123456789' )],
261+ convert_charrefs = False )
262+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
263+ self ._run_check ('�' , [('charref' , 'x123456789' )],
264+ convert_charrefs = False )
265+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
266+
267+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = False )
268+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = True )
269+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = False )
270+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = True )
194271
195272 def test_bad_nesting (self ):
196273 # Strangely, this *is* supposed to test that overlapping
@@ -762,20 +839,6 @@ def test_correct_detection_of_start_tags(self):
762839 ]
763840 self ._run_check (html , expected )
764841
765- def test_EOF_in_charref (self ):
766- # see #17802
767- # This test checks that the UnboundLocalError reported in the issue
768- # is not raised, however I'm not sure the returned values are correct.
769- # Maybe HTMLParser should use self.unescape for these
770- data = [
771- ('a&' , [('data' , 'a&' )]),
772- ('a&b' , [('data' , 'ab' )]),
773- ('a&b ' , [('data' , 'a' ), ('entityref' , 'b' ), ('data' , ' ' )]),
774- ('a&b;' , [('data' , 'a' ), ('entityref' , 'b' )]),
775- ]
776- for html , expected in data :
777- self ._run_check (html , expected )
778-
779842 def test_eof_in_comments (self ):
780843 data = [
781844 ('<!--' , [('comment' , '' )]),
0 commit comments