python-lsp · krassowski · Oct 11, 2023 · Oct 10, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/docstring_to_markdown/__init__.py b/docstring_to_markdown/__init__.py
@@ -1,3 +1,4 @@
+from .google import google_to_markdown, looks_like_google
 from .rst import looks_like_rst, rst_to_markdown
 
 __version__ = "0.12"
@@ -10,4 +11,8 @@ class UnknownFormatError(Exception):
 def convert(docstring: str) -> str:
     if looks_like_rst(docstring):
         return rst_to_markdown(docstring)
+
+    if looks_like_google(docstring):
+        return google_to_markdown(docstring)
+
     raise UnknownFormatError()
diff --git a/docstring_to_markdown/google.py b/docstring_to_markdown/google.py
@@ -0,0 +1,171 @@
+import re
+from textwrap import dedent
+from typing import List
+
+# All possible sections in Google style docstrings
+SECTION_HEADERS: List[str] = [
+    "Args",
+    "Returns",
+    "Raises",
+    "Yields",
+    "Example",
+    "Examples",
+    "Attributes",
+    "Note",
+    "Todo",
+]
+
+# These sections will not be parsed as a list of arguments/return values/etc
+PLAIN_TEXT_SECTIONS: List[str] = [
+    "Examples",
+    "Example",
+    "Note",
+    "Todo",
+]
+
+ESCAPE_RULES = {
+    # Avoid Markdown in magic methods or filenames like __init__.py
+    r"__(?P<text>\S+)__": r"\_\_\g<text>\_\_",
+}
+
+
+class Section:
+    def __init__(self, name: str, content: str) -> None:
+        self.name = name
+        self.content = ""
+
+        self._parse(content)
+
+    def _parse(self, content: str) -> None:
+        content = content.rstrip("\n")
+
+        if self.name in PLAIN_TEXT_SECTIONS:
+            self.content = dedent(content)
+            return
+
+        parts = []
+        cur_part = []
+
+        for line in content.split("\n"):
+            line = line.replace("    ", "", 1)
+
+            if line.startswith(" "):
+                # Continuation from a multiline description
+                cur_part.append(line)
+                continue
+
+            if cur_part:
+                # Leaving multiline description
+                parts.append(cur_part)
+                cur_part = [line]
+            else:
+                # Entering new description part
+                cur_part.append(line)
+
+        # Last part
+        parts.append(cur_part)
+
+        # Format section
+        for part in parts:
+            indentation = ""
+            skip_first = False
+
+            if ":" in part[0]:
+                spl = part[0].split(":")
+
+                arg = spl[0]
+                description = ":".join(spl[1:]).lstrip()
+                indentation = (len(arg) + 6) * " "
+
+                if description:
+                    self.content += "- `{}`: {}\n".format(arg, description)
+                else:
+                    skip_first = True
+                    self.content += "- `{}`: ".format(arg)
+            else:
+                self.content += "- {}\n".format(part[0])
+
+            for n, line in enumerate(part[1:]):
+                if skip_first and n == 0:
+                    # This ensures that indented args get moved to the
+                    # previous line
+                    self.content += "{}\n".format(line.lstrip())
+                    continue
+
+                self.content += "{}{}\n".format(indentation, line.lstrip())
+
+        self.content = self.content.rstrip("\n")
+
+    def as_markdown(self) -> str:
+        return "#### {}\n\n{}\n\n".format(self.name, self.content)
+
+
+class GoogleDocstring:
+    def __init__(self, docstring: str) -> None:
+        self.sections: List[Section] = []
+        self.description: str = ""
+
+        self._parse(docstring)
+
+    def _parse(self, docstring: str) -> None:
+        self.sections = []
+        self.description = ""
+
+        buf = ""
+        cur_section = ""
+
+        for line in docstring.split("\n"):
+            if is_section(line):
+                # Entering new section
+                if cur_section:
+                    # Leaving previous section, save it and reset buffer
+                    self.sections.append(Section(cur_section, buf))
+                    buf = ""
+
+                # Remember currently parsed section
+                cur_section = line.rstrip(":")
+                continue
+
+            # Parse section content
+            if cur_section:
+                buf += line + "\n"
+            else:
+                # Before setting cur_section, we're parsing the function description
+                self.description += line + "\n"
+
+        # Last section
+        self.sections.append(Section(cur_section, buf))
+
+    def as_markdown(self) -> str:
+        text = self.description
+
+        for section in self.sections:
+            text += section.as_markdown()
+
+        return text.rstrip("\n") + "\n"  # Only keep one last newline
+
+
+def is_section(line: str) -> bool:
+    for section in SECTION_HEADERS:
+        if re.search(r"{}:".format(section), line):
+            return True
+
+    return False
+
+
+def looks_like_google(value: str) -> bool:
+    for section in SECTION_HEADERS:
+        if re.search(r"{}:\n".format(section), value):
+            return True
+
+    return False
+
+
+def google_to_markdown(text: str, extract_signature: bool = True) -> str:
+    # Escape parts we don't want to render
+    for pattern, replacement in ESCAPE_RULES.items():
+        text = re.sub(pattern, replacement, text)
+
+    docstring = GoogleDocstring(text)
+
+    return docstring.as_markdown()
diff --git a/tests/test_google.py b/tests/test_google.py
@@ -0,0 +1,135 @@
+import pytest
+
+from docstring_to_markdown.google import google_to_markdown, looks_like_google
+
+BASIC_EXAMPLE = """Do **something**.
+
+Some more detailed description.
+
+Args:
+    a: some arg
+    b: some arg
+
+Returns:
+    Same *stuff*
+"""
+
+BASIC_EXAMPLE_MD = """Do **something**.
+
+Some more detailed description.
+
+#### Args
+
+- `a`: some arg
+- `b`: some arg
+
+#### Returns
+
+- Same *stuff*
+"""
+
+ESCAPE_MAGIC_METHOD = """Example.
+
+Args:
+    a: see __init__.py
+"""
+
+ESCAPE_MAGIC_METHOD_MD = """Example.
+
+#### Args
+
+- `a`: see \\_\\_init\\_\\_.py
+"""
+
+PLAIN_SECTION = """Example.
+
+Args:
+    a: some arg
+
+Note:
+    Do not use this.
+
+Example:
+    Do it like this.
+"""
+
+PLAIN_SECTION_MD = """Example.
+
+#### Args
+
+- `a`: some arg
+
+#### Note
+
+Do not use this.
+
+#### Example
+
+Do it like this.
+"""
+
+MULTILINE_ARG_DESCRIPTION = """Example.
+
+Args:
+    a (str): This is a long description
+             spanning over several lines
+        also with broken indentation
+    b (str): Second arg
+    c (str):
+        On the next line
+        And also multiple lines
+"""
+
+MULTILINE_ARG_DESCRIPTION_MD = """Example.
+
+#### Args
+
+- `a (str)`: This is a long description
+             spanning over several lines
+             also with broken indentation
+- `b (str)`: Second arg
+- `c (str)`: On the next line
+             And also multiple lines
+"""
+
+GOOGLE_CASES = {
+    "basic example": {
+        "google": BASIC_EXAMPLE,
+        "md": BASIC_EXAMPLE_MD,
+    },
+    "escape magic method": {
+        "google": ESCAPE_MAGIC_METHOD,
+        "md": ESCAPE_MAGIC_METHOD_MD,
+    },
+    "plain section": {
+        "google": PLAIN_SECTION,
+        "md": PLAIN_SECTION_MD,
+    },
+    "multiline arg description": {
+        "google": MULTILINE_ARG_DESCRIPTION,
+        "md": MULTILINE_ARG_DESCRIPTION_MD,
+    },
+}
+
+
+@pytest.mark.parametrize(
+    "google",
+    [case["google"] for case in GOOGLE_CASES.values()],
+    ids=GOOGLE_CASES.keys(),
+)
+def test_looks_like_google_recognises_google(google):
+    assert looks_like_google(google)
+
+
+def test_looks_like_google_ignores_plain_text():
+    assert not looks_like_google("This is plain text")
+    assert not looks_like_google("See Also\n--------\n")
+
+
+@pytest.mark.parametrize(
+    "google,markdown",
+    [[case["google"], case["md"]] for case in GOOGLE_CASES.values()],
+    ids=GOOGLE_CASES.keys(),
+)
+def test_google_to_markdown(google, markdown):
+    assert google_to_markdown(google) == markdown