@@ -109,12 +109,13 @@ def get_events(self):
109109
110110class TestCaseBase (unittest .TestCase ):
111111
112- def get_collector (self ):
113- return EventCollector (convert_charrefs = False )
112+ def get_collector (self , convert_charrefs = False ):
113+ return EventCollector (convert_charrefs = convert_charrefs )
114114
115- def _run_check (self , source , expected_events , collector = None ):
115+ def _run_check (self , source , expected_events ,
116+ * , collector = None , convert_charrefs = False ):
116117 if collector is None :
117- collector = self .get_collector ()
118+ collector = self .get_collector (convert_charrefs = convert_charrefs )
118119 parser = collector
119120 for s in source :
120121 parser .feed (s )
@@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None):
128129
129130 def _run_check_extra (self , source , events ):
130131 self ._run_check (source , events ,
131- EventCollectorExtra (convert_charrefs = False ))
132+ collector = EventCollectorExtra (convert_charrefs = False ))
132133
133134
134135class HTMLParserTestCase (TestCaseBase ):
@@ -187,10 +188,75 @@ def test_malformatted_charref(self):
187188 ])
188189
189190 def test_unclosed_entityref (self ):
190- self ._run_check ("&entityref foo" , [
191- ("entityref" , "entityref" ),
192- ("data" , " foo" ),
193- ])
191+ self ._run_check ('> z' , [('entityref' , 'gt' ), ('data' , ' z' )],
192+ convert_charrefs = False )
193+ self ._run_check ('> z' , [('data' , '> z' )], convert_charrefs = True )
194+
195+ self ._run_check ('&undefined z' ,
196+ [('entityref' , 'undefined' ), ('data' , ' z' )],
197+ convert_charrefs = False )
198+ self ._run_check ('&undefined z' , [('data' , '&undefined z' )],
199+ convert_charrefs = True )
200+
201+ self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = False )
202+ self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = True )
203+
204+ def test_eof_in_entityref (self ):
205+ self ._run_check ('>' , [('entityref' , 'gt' )], convert_charrefs = False )
206+ self ._run_check ('>' , [('data' , '>' )], convert_charrefs = True )
207+
208+ self ._run_check ('&g' , [('entityref' , 'g' )], convert_charrefs = False )
209+ self ._run_check ('&g' , [('data' , '&g' )], convert_charrefs = True )
210+
211+ self ._run_check ('&undefined' , [('entityref' , 'undefined' )],
212+ convert_charrefs = False )
213+ self ._run_check ('&undefined' , [('data' , '&undefined' )],
214+ convert_charrefs = True )
215+
216+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = False )
217+ self ._run_check ('&' , [('data' , '&' )], convert_charrefs = True )
218+
219+ def test_unclosed_charref (self ):
220+ self ._run_check ('{ z' , [('charref' , '123' ), ('data' , ' z' )],
221+ convert_charrefs = False )
222+ self ._run_check ('{ z' , [('data' , '{ z' )], convert_charrefs = True )
223+ self ._run_check ('« z' , [('charref' , 'xab' ), ('data' , ' z' )],
224+ convert_charrefs = False )
225+ self ._run_check ('« z' , [('data' , '\xab z' )], convert_charrefs = True )
226+
227+ self ._run_check ('� z' ,
228+ [('charref' , '123456789' ), ('data' , ' z' )],
229+ convert_charrefs = False )
230+ self ._run_check ('� z' , [('data' , '\ufffd z' )],
231+ convert_charrefs = True )
232+ self ._run_check ('� z' ,
233+ [('charref' , 'x123456789' ), ('data' , ' z' )],
234+ convert_charrefs = False )
235+ self ._run_check ('� z' , [('data' , '\ufffd z' )],
236+ convert_charrefs = True )
237+
238+ self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = False )
239+ self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = True )
240+ self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = False )
241+ self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = True )
242+
243+ def test_eof_in_charref (self ):
244+ self ._run_check ('{' , [('charref' , '123' )], convert_charrefs = False )
245+ self ._run_check ('{' , [('data' , '{' )], convert_charrefs = True )
246+ self ._run_check ('«' , [('charref' , 'xab' )], convert_charrefs = False )
247+ self ._run_check ('«' , [('data' , '\xab ' )], convert_charrefs = True )
248+
249+ self ._run_check ('�' , [('charref' , '123456789' )],
250+ convert_charrefs = False )
251+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
252+ self ._run_check ('�' , [('charref' , 'x123456789' )],
253+ convert_charrefs = False )
254+ self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
255+
256+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = False )
257+ self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = True )
258+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = False )
259+ self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = True )
194260
195261 def test_bad_nesting (self ):
196262 # Strangely, this *is* supposed to test that overlapping
@@ -762,20 +828,6 @@ def test_correct_detection_of_start_tags(self):
762828 ]
763829 self ._run_check (html , expected )
764830
765- def test_EOF_in_charref (self ):
766- # see #17802
767- # This test checks that the UnboundLocalError reported in the issue
768- # is not raised, however I'm not sure the returned values are correct.
769- # Maybe HTMLParser should use self.unescape for these
770- data = [
771- ('a&' , [('data' , 'a&' )]),
772- ('a&b' , [('data' , 'ab' )]),
773- ('a&b ' , [('data' , 'a' ), ('entityref' , 'b' ), ('data' , ' ' )]),
774- ('a&b;' , [('data' , 'a' ), ('entityref' , 'b' )]),
775- ]
776- for html , expected in data :
777- self ._run_check (html , expected )
778-
779831 def test_eof_in_comments (self ):
780832 data = [
781833 ('<!--' , [('comment' , '' )]),
0 commit comments