python-lsp · krassowski · May 2, 2025 · May 2, 2025 · May 2, 2025
diff --git a/docstring_to_markdown/__init__.py b/docstring_to_markdown/__init__.py
@@ -6,7 +6,7 @@
 if TYPE_CHECKING:
     from importlib_metadata import EntryPoint
 
-__version__ = "0.16"
+__version__ = "0.17"
 
 
 class UnknownFormatError(Exception):

diff --git a/docstring_to_markdown/rst.py b/docstring_to_markdown/rst.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import IntEnum, auto
+from textwrap import dedent
 from types import SimpleNamespace
 from typing import Callable, Match, Union, List, Dict
 import re
@@ -299,8 +300,8 @@ def inline_markdown(self):
 SECTION_DIRECTIVES: Dict[str, List[Directive]] = {
     'Parameters': [
         Directive(
-            pattern=r'^(?P<other_args>\*\*kwargs|\*args)$',
-            replacement=r'- `\g<other_args>`'
+            pattern=r'^(?P<other_args>(\w[\w\d_\.]*)|\*\*kwargs|\*args)$',
+            replacement=r'- `\g<other_args>`:'
         ),
         Directive(
             pattern=r'^(?P<arg1>[^:\s]+\d), (?P<arg2>[^:\s]+\d), \.\.\. : (?P<type>.+)$',
@@ -336,6 +337,7 @@ def _find_directive_pattern(name: str):
 
 
 def looks_like_rst(value: str) -> bool:
+    value = dedent(value)
     # check if any of the characteristic sections (and the properly formatted underline) is there
     for section in _RST_SECTIONS:
         if (section + '\n' + '-' * len(section) + '\n') in value:
@@ -542,10 +544,20 @@ class BlockParser(IParser):
     follower: Union['IParser', None] = None
     _buffer: List[str]
     _block_started: bool
+    _indent: Union[int, None]
+    should_measure_indent = True
 
     def __init__(self):
         self._buffer = []
         self._block_started = False
+        self._indent = None
+
+    def measure_indent(self, line: str):
+        line_indent = len(line) - len(line.lstrip())
+        if self._indent is None:
+            self._indent = line_indent
+        else:
+            self._indent = min(line_indent, self._indent)
 
     @abstractmethod
     def can_parse(self, line: str) -> bool:
@@ -558,24 +570,33 @@ def _start_block(self, language: str):
     def consume(self, line: str):
         if not self._block_started:
             raise ValueError('Block has not started')   # pragma: no cover
+        if self.should_measure_indent:
+            self.measure_indent(line)
         self._buffer.append(line)
 
     def finish_consumption(self, final: bool) -> str:
         # if the last line is empty (e.g. a separator of intended block), discard it
         if self._buffer[len(self._buffer) - 1].strip() == '':
             self._buffer.pop()
         self._buffer.append(self.enclosure + '\n')
-        result = '\n'.join(self._buffer)
+        indent = " " * (self._indent or 0)
+        intermediate = '\n'.join(self._buffer)
+        result = '\n'.join([
+            (indent + line) if line else line
+            for line in intermediate.splitlines()
+        ]) if indent else intermediate
         if not final:
             result += '\n'
         self._buffer = []
         self._block_started = False
+        self._indent = None
         return result
 
 
 class IndentedBlockParser(BlockParser, ABC):
     _is_block_beginning: bool
     _block_indent_size: Union[int, None]
+    should_measure_indent = False
 
     def __init__(self):
         super(IndentedBlockParser, self).__init__()
@@ -599,6 +620,7 @@ def consume(self, line: str):
                 return
         if self._block_indent_size is None:
             self._block_indent_size = len(line) - len(line.lstrip())
+        self.measure_indent(line)
         super().consume(line[self._block_indent_size:])
 
     def finish_consumption(self, final: bool) -> str:
@@ -684,6 +706,7 @@ def can_parse(self, line: str):
         return line.strip() in self.directives
 
     def initiate_parsing(self, line: str, current_language: str):
+        self.measure_indent(line)
         admonition = self.directives[line.strip()]
         self._start_block(f'\n{admonition.block_markdown}\n')
         return IBlockBeginning(remainder='')
@@ -694,6 +717,7 @@ def can_parse(self, line: str) -> bool:
         return re.match(CODE_BLOCK_PATTERN, line) is not None
 
     def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning:
+        self.measure_indent(line)
         match = re.match(CODE_BLOCK_PATTERN, line)
         # already checked in can_parse
         assert match
@@ -753,6 +777,8 @@ def rst_to_markdown(text: str, extract_signature: bool = True) -> str:
     most_recent_section: Union[str, None] = None
     is_first_line = True
 
+    text = dedent(text)
+
     def flush_buffer():
         nonlocal lines_buffer
         lines = '\n'.join(lines_buffer)
@@ -766,7 +792,8 @@ def flush_buffer():
         lines_buffer = []
         return lines
 
-    for line in text.split('\n'):
+    lines = text.split('\n')
+    for i, line in enumerate(lines):
         if is_first_line:
             if extract_signature:
                 signature_match = re.match(r'^(?P<name>\S+)\((?P<params>.*)\)$', line)
@@ -809,7 +836,9 @@ def flush_buffer():
             else:
                 if most_recent_section in SECTION_DIRECTIVES:
                     for section_directive in SECTION_DIRECTIVES[most_recent_section]:
-                        if re.match(section_directive.pattern, trimmed_line):
+                        next_line = lines[i + 1] if i + 1 < len(lines) else ""
+                        is_next_line_section = set(next_line.strip()) == {"-"}
+                        if re.match(section_directive.pattern, line) and not is_next_line_section:
                             line = re.sub(section_directive.pattern, section_directive.replacement, trimmed_line)
                             break
                 if trimmed_line.rstrip() in RST_SECTIONS:

diff --git a/tests/test_rst.py b/tests/test_rst.py
@@ -337,7 +337,7 @@ def func(): pass
 
 - `x`: array_like
     Input array.
-- `**kwargs`
+- `**kwargs`:
     For other keyword-only arguments, see the ufunc docs.
 """
 
@@ -638,6 +638,119 @@ def func(): pass
 """
 
 
+# this format is often used by polars
+PARAMETERS_WITHOUT_TYPE = """
+Parameters
+----------
+source
+    Path(s) to a file or directory
+    When needing to authenticate for scanning cloud locations, see the
+    `storage_options` parameter.
+columns
+    Columns to select. Accepts a list of column indices (starting at zero) or a list
+    of column names.
+n_rows
+    Stop reading from parquet file after reading `n_rows`.
+    Only valid when `use_pyarrow=False`.
+
+Returns
+-------
+DataFrame
+"""
+
+PARAMETERS_WITHOUT_TYPE_MARKDOWN = """
+#### Parameters
+
+- `source`:
+    Path(s) to a file or directory
+    When needing to authenticate for scanning cloud locations, see the
+    `storage_options` parameter.
+- `columns`:
+    Columns to select. Accepts a list of column indices (starting at zero) or a list
+    of column names.
+- `n_rows`:
+    Stop reading from parquet file after reading `n_rows`.
+    Only valid when `use_pyarrow=False`.
+
+#### Returns
+
+DataFrame
+"""
+
+INDENTED_DOCSTRING = """
+    Parameters
+    ----------
+    glob
+        Expand path given via globbing rules.
+"""
+
+INDENTED_DOCSTRING_MARKDOWN = """
+#### Parameters
+
+- `glob`:
+    Expand path given via globbing rules.
+"""
+
+
+WARNINGS_IN_PARAMETERS = """
+Parameters
+----------
+glob
+    Expand path given via globbing rules.
+schema
+    Specify the datatypes of the columns. The datatypes must match the
+    datatypes in the file(s). If there are extra columns that are not in the
+    file(s), consider also enabling `allow_missing_columns`.
+
+    .. warning::
+        This functionality is considered **unstable**. It may be changed
+        at any point without it being considered a breaking change.
+hive_schema
+    The column names and data types of the columns by which the data is partitioned.
+    If set to `None` (default), the schema of the Hive partitions is inferred.
+
+    .. warning::
+        This functionality is considered **unstable**. It may be changed
+        at any point without it being considered a breaking change.
+try_parse_hive_dates
+    Whether to try parsing hive values as date/datetime types.
+"""
+
+
+WARNINGS_IN_PARAMETERS_MARKDOWN = """
+#### Parameters
+
+- `glob`:
+    Expand path given via globbing rules.
+- `schema`:
+    Specify the datatypes of the columns. The datatypes must match the
+    datatypes in the file(s). If there are extra columns that are not in the
+    file(s), consider also enabling `allow_missing_columns`.
+
+
+    ---
+    ⚠️  **Warning**
+
+    This functionality is considered **unstable**. It may be changed
+    at any point without it being considered a breaking change.
+
+    ---
+- `hive_schema`:
+    The column names and data types of the columns by which the data is partitioned.
+    If set to `None` (default), the schema of the Hive partitions is inferred.
+
+
+    ---
+    ⚠️  **Warning**
+
+    This functionality is considered **unstable**. It may be changed
+    at any point without it being considered a breaking change.
+
+    ---
+- `try_parse_hive_dates`:
+    Whether to try parsing hive values as date/datetime types.
+"""
+
 NESTED_PARAMETERS = """
 Parameters
 ----------
@@ -887,6 +1000,18 @@ def foo():
         'rst': NESTED_PARAMETERS,
         'md': NESTED_PARAMETERS_MARKDOWN
     },
+    'converts parameter without type': {
+        'rst': PARAMETERS_WITHOUT_TYPE,
+        'md': PARAMETERS_WITHOUT_TYPE_MARKDOWN
+    },
+    'converts indented parameters lists': {
+        'rst': INDENTED_DOCSTRING,
+        'md': INDENTED_DOCSTRING_MARKDOWN
+    },
+    'converts warnings in parameters lists': {
+        'rst': WARNINGS_IN_PARAMETERS,
+        'md': WARNINGS_IN_PARAMETERS_MARKDOWN
+    },
     'converts sphinx signatures': {
         'rst': SPHINX_SIGNATURE,
         'md': SPHINX_SIGNATURE_MARKDOWN