Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Fix line breaks in markdown tables #156

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ v0.3
- Add ``private`` as an optional argument to :meth:`.hub_utils.push` to
optionally set the visibility status of a repo when pushing to the hub.
:pr:`130` by `Adrin Jalali`_.
- Fix a bug that resulted in markdown tables being rendered incorrectly if
entries contained line breaks by `Benjamin Bossan`_.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR number missing here.


v0.2
----
Expand Down
29 changes: 22 additions & 7 deletions skops/card/_model_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ def wrap_as_details(text: str, folded: bool) -> str:
return f"<details>\n<summary> Click to expand </summary>\n\n{text}\n\n</details>"


def _clean_table(table: str) -> str:
# replace line breaks "\n" with html tag <br />, however, leave end-of-line
# line breaks (eol_lb) intact
eol_lb = "|\n"
placeholder = "$%!?" # arbitrary sting that never appears naturally
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as a friend once said, equivalent to banging my head against the keyboard lol

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's stupid :D If you have a better idea, please let me know

About the PR number: I fixed that. How do you add the correct number before you even created the PR? Do you just check the highest number so far and hope that until you actually open the PR, there hasn't been another PR in-between?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't, I just add that as a commit after I create the PR :D

table = (
table.replace(eol_lb, placeholder)
.replace("\n", "<br />")
.replace(placeholder, eol_lb)
)
return table


@dataclass
class PlotSection:
"""Adds a link to a figure to the model card"""
Expand All @@ -48,7 +61,7 @@ def __repr__(self) -> str:
class TableSection:
"""Adds a table to the model card"""

table: dict["str", list[Any]]
table: dict[str, list[Any]]
folded: bool = False

def __post_init__(self) -> None:
Expand Down Expand Up @@ -78,8 +91,8 @@ def format(self) -> str:
else:
headers = self.table.keys()

table = tabulate(
self.table, tablefmt="github", headers=headers, showindex=False
table = _clean_table(
tabulate(self.table, tablefmt="github", headers=headers, showindex=False)
)
return wrap_as_details(table, folded=self.folded)

Expand Down Expand Up @@ -467,10 +480,12 @@ def _extract_estimator_config(self) -> str:
Markdown table of hyperparameters.
"""
hyperparameter_dict = self.model.get_params(deep=True)
return tabulate(
list(hyperparameter_dict.items()),
headers=["Hyperparameter", "Value"],
tablefmt="github",
return _clean_table(
tabulate(
list(hyperparameter_dict.items()),
headers=["Hyperparameter", "Value"],
tablefmt="github",
)
)

@staticmethod
Expand Down
45 changes: 45 additions & 0 deletions skops/card/tests/test_card.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ def test_hyperparameter_table(destination_path, model_card):
assert "fit_intercept" in model_card


def test_hyperparameter_table_with_line_break(destination_path):
# Hyperparameters can contain values with line breaks, "\n", in them. In
# that case, the markdown table is broken. Check that the hyperparameter
# table we create properly replaces the "\n" with "<br />".
class EstimatorWithLbInParams:
def get_params(self, deep=False):
return {"fit_intercept": True, "n_jobs": "line\nwith\nbreak"}

model_card = Card(EstimatorWithLbInParams())
model_card = model_card.render()
assert "| n_jobs | line<br />with<br />break |" in model_card


def test_plot_model(destination_path, model_card):
model_card = model_card.render()
assert "<style>" in model_card
Expand Down Expand Up @@ -415,3 +428,35 @@ def test_folded(self, table_dict, folded):
assert "<details>" in output
else:
assert "<details>" not in output

def test_line_break_in_entry(self, table_dict):
# Line breaks are not allowed inside markdown tables, so check that
# they're removed. We test 3 conditions here:

# 1. custom object with line breaks in repr
# 2. string with line break in the middle
# 3. string with line break at start, middle, and end

# Note that for the latter, tabulate will automatically strip the line
# breaks from the start and end.
class LineBreakInRepr:
"""Custom object whose repr has a line break"""

def __repr__(self) -> str:
return "obj\nwith lb"

table_dict["with break"] = [
LineBreakInRepr(),
"hi\nthere",
"""
entry with
line breaks
""",
]
section = TableSection(table=table_dict)
expected = """| split | score | with break |
|---------|---------|--------------|
| 1 | 4 | obj<br />with lb |
| 2 | 5 | hi<br />there |
| 3 | 6 | entry with<br />line breaks |"""
assert section.format() == expected