1- from bs4 import BeautifulSoup , NavigableString , Comment , Doctype
1+ from bs4 import BeautifulSoup , NavigableString , Tag , Comment , Doctype
22from textwrap import fill
33import re
44import six
@@ -123,9 +123,9 @@ def convert(self, html):
123123 return self .convert_soup (soup )
124124
125125 def convert_soup (self , soup ):
126- return self .process_tag (soup , convert_as_inline = False , children_only = True )
126+ return self .process_tag (soup , convert_as_inline = False )
127127
128- def process_tag (self , node , convert_as_inline , children_only = False ):
128+ def process_tag (self , node , convert_as_inline ):
129129 text = ''
130130
131131 # markdown headings or cells can't include
@@ -134,7 +134,7 @@ def process_tag(self, node, convert_as_inline, children_only=False):
134134 isCell = node .name in ['td' , 'th' ]
135135 convert_children_as_inline = convert_as_inline
136136
137- if not children_only and ( isHeading or isCell ) :
137+ if isHeading or isCell :
138138 convert_children_as_inline = True
139139
140140 # Remove whitespace-only textnodes just before, after or
@@ -170,10 +170,11 @@ def process_tag(self, node, convert_as_inline, children_only=False):
170170 newlines = '\n ' * max (newlines_left , newlines_right )
171171 text = text_strip + newlines + next_text_strip
172172
173- if not children_only :
174- convert_fn = getattr (self , 'convert_%s' % node .name , None )
175- if convert_fn and self .should_convert_tag (node .name ):
176- text = convert_fn (node , text , convert_as_inline )
173+ # apply this tag's final conversion function
174+ # (BeautifulSoup objects have a name of "[document]" and will not apply one)
175+ convert_fn = getattr (self , 'convert_%s' % node .name , None )
176+ if convert_fn and self .should_convert_tag (node .name ):
177+ text = convert_fn (node , text , convert_as_inline )
177178
178179 return text
179180
0 commit comments