Skip to content
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## 0.18.15-dev1

### Enhancements
- Optimized the runtime of `ElementHtml._get_children_html`
- Speed up function ElementHtml._get_children_html by 234% (codeflash)
- Speed up function group_broken_paragraphs by 30% (codeflash)

### Features
Expand Down
7 changes: 5 additions & 2 deletions unstructured/partition/html/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,15 @@ def _get_children_html(self, soup: BeautifulSoup, element_html: Tag, **kwargs: A
wrapper = soup.new_tag(name="div")
wrapper.append(element_html)
for child in self.children:
child_html = child.get_html_element(**kwargs)
child_html = child.get_html_element(_soup=soup, **kwargs)
wrapper.append(child_html)
return wrapper

def get_html_element(self, **kwargs: Any) -> Tag:
soup = BeautifulSoup("", HTML_PARSER)
soup: Optional[BeautifulSoup] = kwargs.pop("_soup", None)
if soup is None:
soup = BeautifulSoup("", HTML_PARSER)

element_html = self.get_text_as_html()
if element_html is None:
element_html = soup.new_tag(name=self.html_tag)
Expand Down