Skip to content

Commit a4e1a68

Browse files
committed
Add flag to support returning tables in HTML when rendering to markdown
Helps improve about 2% on olmo bench
1 parent eabb23c commit a4e1a68

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

marker/renderers/markdown.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,15 @@ def __init__(
6262
page_separator,
6363
inline_math_delimiters,
6464
block_math_delimiters,
65+
html_tables_in_markdown,
6566
**kwargs,
6667
):
6768
super().__init__(**kwargs)
6869
self.paginate_output = paginate_output
6970
self.page_separator = page_separator
7071
self.inline_math_delimiters = inline_math_delimiters
7172
self.block_math_delimiters = block_math_delimiters
73+
self.html_tables_in_markdown = html_tables_in_markdown
7274

7375
def convert_div(self, el, text, parent_tags):
7476
is_page = el.has_attr("class") and el["class"][0] == "page"
@@ -116,6 +118,9 @@ def convert_math(self, el, text, parent_tags):
116118
)
117119

118120
def convert_table(self, el, text, parent_tags):
121+
if self.html_tables_in_markdown:
122+
return "\n\n" + str(el) + "\n\n"
123+
119124
total_rows = len(el.find_all("tr"))
120125
colspans = []
121126
rowspan_cols = defaultdict(int)
@@ -268,6 +273,9 @@ class MarkdownRenderer(HTMLRenderer):
268273
block_math_delimiters: Annotated[
269274
Tuple[str], "The delimiters to use for block math."
270275
] = ("$$", "$$")
276+
html_tables_in_markdown: Annotated[
277+
bool, "Return tables formatted as HTML, instead of in markdown"
278+
] = False
271279

272280
@property
273281
def md_cls(self):
@@ -284,6 +292,7 @@ def md_cls(self):
284292
sup_symbol="<sup>",
285293
inline_math_delimiters=self.inline_math_delimiters,
286294
block_math_delimiters=self.block_math_delimiters,
295+
html_tables_in_markdown=self.html_tables_in_markdown
287296
)
288297

289298
def __call__(self, document: Document) -> MarkdownOutput:

0 commit comments

Comments
 (0)