Skip to content

Google style support #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docstring_to_markdown/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .google import google_to_markdown, looks_like_google
from .rst import looks_like_rst, rst_to_markdown

__version__ = "0.12"
Expand All @@ -10,4 +11,8 @@ class UnknownFormatError(Exception):
def convert(docstring: str) -> str:
if looks_like_rst(docstring):
return rst_to_markdown(docstring)

if looks_like_google(docstring):
return google_to_markdown(docstring)

raise UnknownFormatError()
171 changes: 171 additions & 0 deletions docstring_to_markdown/google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import re
from textwrap import dedent
from typing import List

# All possible sections in Google style docstrings
SECTION_HEADERS: List[str] = [
"Args",
"Returns",
"Raises",
"Yields",
"Example",
"Examples",
"Attributes",
"Note",
"Todo",
]

# These sections will not be parsed as a list of arguments/return values/etc
PLAIN_TEXT_SECTIONS: List[str] = [
"Examples",
"Example",
"Note",
"Todo",
]

ESCAPE_RULES = {
# Avoid Markdown in magic methods or filenames like __init__.py
r"__(?P<text>\S+)__": r"\_\_\g<text>\_\_",
}


class Section:
def __init__(self, name: str, content: str) -> None:
self.name = name
self.content = ""

self._parse(content)

def _parse(self, content: str) -> None:
content = content.rstrip("\n")

if self.name in PLAIN_TEXT_SECTIONS:
self.content = dedent(content)
return

parts = []
cur_part = []

for line in content.split("\n"):
line = line.replace(" ", "", 1)

if line.startswith(" "):
# Continuation from a multiline description
cur_part.append(line)
continue

if cur_part:
# Leaving multiline description
parts.append(cur_part)
cur_part = [line]
else:
# Entering new description part
cur_part.append(line)

# Last part
parts.append(cur_part)

# Format section
for part in parts:
indentation = ""
skip_first = False

if ":" in part[0]:
spl = part[0].split(":")

arg = spl[0]
description = ":".join(spl[1:]).lstrip()
indentation = (len(arg) + 6) * " "

if description:
self.content += "- `{}`: {}\n".format(arg, description)
else:
skip_first = True
self.content += "- `{}`: ".format(arg)
else:
self.content += "- {}\n".format(part[0])

for n, line in enumerate(part[1:]):
if skip_first and n == 0:
# This ensures that indented args get moved to the
# previous line
self.content += "{}\n".format(line.lstrip())
continue

self.content += "{}{}\n".format(indentation, line.lstrip())

self.content = self.content.rstrip("\n")

def as_markdown(self) -> str:
return "#### {}\n\n{}\n\n".format(self.name, self.content)


class GoogleDocstring:
def __init__(self, docstring: str) -> None:
self.sections: List[Section] = []
self.description: str = ""

self._parse(docstring)

def _parse(self, docstring: str) -> None:
self.sections = []
self.description = ""

buf = ""
cur_section = ""

for line in docstring.split("\n"):
if is_section(line):
# Entering new section
if cur_section:
# Leaving previous section, save it and reset buffer
self.sections.append(Section(cur_section, buf))
buf = ""

# Remember currently parsed section
cur_section = line.rstrip(":")
continue

# Parse section content
if cur_section:
buf += line + "\n"
else:
# Before setting cur_section, we're parsing the function description
self.description += line + "\n"

# Last section
self.sections.append(Section(cur_section, buf))

def as_markdown(self) -> str:
text = self.description

for section in self.sections:
text += section.as_markdown()

return text.rstrip("\n") + "\n" # Only keep one last newline


def is_section(line: str) -> bool:
for section in SECTION_HEADERS:
if re.search(r"{}:".format(section), line):
return True

return False


def looks_like_google(value: str) -> bool:
for section in SECTION_HEADERS:
if re.search(r"{}:\n".format(section), value):
return True

return False


def google_to_markdown(text: str, extract_signature: bool = True) -> str:
# Escape parts we don't want to render
for pattern, replacement in ESCAPE_RULES.items():
text = re.sub(pattern, replacement, text)

docstring = GoogleDocstring(text)

return docstring.as_markdown()
135 changes: 135 additions & 0 deletions tests/test_google.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import pytest

from docstring_to_markdown.google import google_to_markdown, looks_like_google

BASIC_EXAMPLE = """Do **something**.

Some more detailed description.

Args:
a: some arg
b: some arg

Returns:
Same *stuff*
"""

BASIC_EXAMPLE_MD = """Do **something**.

Some more detailed description.

#### Args

- `a`: some arg
- `b`: some arg

#### Returns

- Same *stuff*
"""

ESCAPE_MAGIC_METHOD = """Example.

Args:
a: see __init__.py
"""

ESCAPE_MAGIC_METHOD_MD = """Example.

#### Args

- `a`: see \\_\\_init\\_\\_.py
"""

PLAIN_SECTION = """Example.

Args:
a: some arg

Note:
Do not use this.

Example:
Do it like this.
"""

PLAIN_SECTION_MD = """Example.

#### Args

- `a`: some arg

#### Note

Do not use this.

#### Example

Do it like this.
"""

MULTILINE_ARG_DESCRIPTION = """Example.

Args:
a (str): This is a long description
spanning over several lines
also with broken indentation
b (str): Second arg
c (str):
On the next line
And also multiple lines
"""

MULTILINE_ARG_DESCRIPTION_MD = """Example.

#### Args

- `a (str)`: This is a long description
spanning over several lines
also with broken indentation
- `b (str)`: Second arg
- `c (str)`: On the next line
And also multiple lines
"""

GOOGLE_CASES = {
"basic example": {
"google": BASIC_EXAMPLE,
"md": BASIC_EXAMPLE_MD,
},
"escape magic method": {
"google": ESCAPE_MAGIC_METHOD,
"md": ESCAPE_MAGIC_METHOD_MD,
},
"plain section": {
"google": PLAIN_SECTION,
"md": PLAIN_SECTION_MD,
},
"multiline arg description": {
"google": MULTILINE_ARG_DESCRIPTION,
"md": MULTILINE_ARG_DESCRIPTION_MD,
},
}


@pytest.mark.parametrize(
"google",
[case["google"] for case in GOOGLE_CASES.values()],
ids=GOOGLE_CASES.keys(),
)
def test_looks_like_google_recognises_google(google):
assert looks_like_google(google)


def test_looks_like_google_ignores_plain_text():
assert not looks_like_google("This is plain text")
assert not looks_like_google("See Also\n--------\n")


@pytest.mark.parametrize(
"google,markdown",
[[case["google"], case["md"]] for case in GOOGLE_CASES.values()],
ids=GOOGLE_CASES.keys(),
)
def test_google_to_markdown(google, markdown):
assert google_to_markdown(google) == markdown