Skip to content
Merged
30 changes: 0 additions & 30 deletions .github/ISSUE_TEMPLATE/bug_report.md

This file was deleted.

105 changes: 105 additions & 0 deletions fast-markdown-mcp/src/fast_markdown_mcp/document_structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import re
from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple

@dataclass
class Section:
"""Represents a section in a markdown document."""
level: int
title: str
content: str
start_pos: int
end_pos: int
subsections: List['Section']

class DocumentStructure:
"""Manages markdown document structure and section access."""

def __init__(self):
self.sections: List[Section] = []
self.toc: Dict[str, Section] = {}

def parse_document(self, content: str) -> None:
"""Parse markdown content into sections."""
self.sections = []
self.toc = {}

# Find all headers with their positions
header_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
headers = [(match.group(1), match.group(2), match.start(), match.end())
for match in header_pattern.finditer(content)]

if not headers:
# If no headers, treat entire document as one section
self.sections = [Section(
level=0,
title="Document",
content=content,
start_pos=0,
end_pos=len(content),
subsections=[]
)]
return

# Process headers into sections
current_sections = []
for i, (hashes, title, start, header_end) in enumerate(headers):
level = len(hashes)

# Find section content (from end of this header to start of next, or end of document)
content_start = header_end
content_end = headers[i + 1][2] if i < len(headers) - 1 else len(content)
section_content = content[content_start:content_end].strip()

section = Section(
level=level,
title=title.strip(),
content=section_content,
start_pos=start,
end_pos=content_end,
subsections=[]
)

# Add to table of contents
section_id = self._make_section_id(title)
self.toc[section_id] = section

# Find parent section by checking levels
while current_sections and current_sections[-1].level >= level:
current_sections.pop()

if current_sections:
current_sections[-1].subsections.append(section)
else:
self.sections.append(section)

current_sections.append(section)

def get_section_by_id(self, section_id: str) -> Optional[Section]:
"""Get a section by its ID."""
return self.toc.get(section_id)

def get_table_of_contents(self) -> List[Tuple[int, str, str]]:
"""Get table of contents as [(level, title, section_id)]."""
toc_entries = []

def add_section(section: Section, prefix: str = ""):
section_id = self._make_section_id(section.title)
toc_entries.append((section.level, prefix + section.title, section_id))
for subsection in section.subsections:
add_section(subsection, prefix + " ")

for section in self.sections:
add_section(section)

return toc_entries

def _make_section_id(self, title: str) -> str:
"""Generate a URL-friendly section ID from title."""
# Convert to lowercase and replace spaces with hyphens
section_id = title.lower().replace(" ", "-")
# Remove any non-alphanumeric characters (except hyphens)
section_id = re.sub(r'[^a-z0-9-]', '', section_id)
# Remove multiple consecutive hyphens
section_id = re.sub(r'-+', '-', section_id)
return section_id.strip('-')
92 changes: 91 additions & 1 deletion fast-markdown-mcp/src/fast_markdown_mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,67 @@

logger = logging.getLogger(__name__)

from .document_structure import DocumentStructure

class MarkdownStore:
"""Manages markdown content and metadata."""

def __init__(self, storage_path: str):
self.base_path = Path(storage_path)
self.content_cache = {}
self.metadata_cache = {}
self.structure_cache = {} # Cache for parsed document structures

async def get_content(self, file_id: str) -> str:
"""Get markdown content."""
file_path = self.base_path / f"{file_id}.md"
try:
return file_path.read_text(encoding='utf-8')
content = file_path.read_text(encoding='utf-8')
# Parse and cache document structure
if file_id not in self.structure_cache:
structure = DocumentStructure()
structure.parse_document(content)
self.structure_cache[file_id] = structure
return content
except Exception as e:
logger.error(f"Error reading content for {file_id}: {e}")
return f"Error reading content: {str(e)}"

async def get_section(self, file_id: str, section_id: str) -> str:
"""Get a specific section from a markdown file."""
try:
if file_id not in self.structure_cache:
await self.get_content(file_id) # This will parse and cache the structure

structure = self.structure_cache[file_id]
section = structure.get_section_by_id(section_id)

if not section:
return f"Section '{section_id}' not found in {file_id}"

return f"Section: {section.title}\n\n{section.content}"
except Exception as e:
logger.error(f"Error getting section {section_id} from {file_id}: {e}")
return f"Error getting section: {str(e)}"

async def get_table_of_contents(self, file_id: str) -> str:
"""Get table of contents for a markdown file."""
try:
if file_id not in self.structure_cache:
await self.get_content(file_id) # This will parse and cache the structure

structure = self.structure_cache[file_id]
toc = structure.get_table_of_contents()

result = [f"Table of Contents for {file_id}:"]
for level, title, section_id in toc:
indent = " " * level
result.append(f"{indent}- {title} [{section_id}]")

return "\n".join(result)
except Exception as e:
logger.error(f"Error getting table of contents for {file_id}: {e}")
return f"Error getting table of contents: {str(e)}"

async def get_metadata(self, file_id: str) -> dict:
"""Get metadata as a dictionary."""
Expand Down Expand Up @@ -335,6 +380,38 @@ async def list_tools() -> list[types.Tool]:
"type": "object",
"properties": {}
}
),
types.Tool(
name="get_section",
description="Get a specific section from a markdown file",
inputSchema={
"type": "object",
"properties": {
"file_id": {
"type": "string",
"description": "ID of the file (without .md extension)"
},
"section_id": {
"type": "string",
"description": "ID of the section to retrieve"
}
},
"required": ["file_id", "section_id"]
}
),
types.Tool(
name="get_table_of_contents",
description="Get table of contents for a markdown file",
inputSchema={
"type": "object",
"properties": {
"file_id": {
"type": "string",
"description": "ID of the file (without .md extension)"
}
},
"required": ["file_id"]
}
)
]

Expand Down Expand Up @@ -373,6 +450,19 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
elif name == "get_stats":
result = await self.store.get_stats()
return [types.TextContent(type="text", text=result)]
elif name == "get_section":
file_id = arguments.get("file_id")
section_id = arguments.get("section_id")
if not file_id or not section_id:
raise ValueError("file_id and section_id are required")
result = await self.store.get_section(file_id, section_id)
return [types.TextContent(type="text", text=result)]
elif name == "get_table_of_contents":
file_id = arguments.get("file_id")
if not file_id:
raise ValueError("file_id is required")
result = await self.store.get_table_of_contents(file_id)
return [types.TextContent(type="text", text=result)]
else:
raise ValueError(f"Unknown tool: {name}")

Expand Down
4 changes: 4 additions & 0 deletions storage/markdown/ai_pydantic_dev_.json

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions storage/markdown/docs_ag2_ai_docs_home.json

Large diffs are not rendered by default.

Loading