We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e17023e commit bdc9983Copy full SHA for bdc9983
cardinal_pythonlib/extract_text.py
@@ -1140,6 +1140,12 @@ def convert_html_to_text(
1140
"""
1141
Converts HTML to text.
1142
1143
+
1144
+ # beautifulsoup4==4.13.4 returns "b''" for an empty bytes array
1145
+ # So we just workaround this here:
1146
+ if bytes is not None and len(blob) == 0:
1147
+ return ""
1148
1149
with get_filelikeobject(filename, blob) as fp:
1150
soup = bs4.BeautifulSoup(fp, "html.parser")
1151
return soup.get_text()
0 commit comments