cyberagiinc · Shubham-Khichi · Jan 31, 2025 · Jan 17, 2025 · Jan 18, 2025 · Jan 18, 2025
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
diff --git a/fast-markdown-mcp/src/fast_markdown_mcp/document_structure.py b/fast-markdown-mcp/src/fast_markdown_mcp/document_structure.py
@@ -0,0 +1,105 @@
+import re
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Tuple
+
+@dataclass
+class Section:
+    """Represents a section in a markdown document."""
+    level: int
+    title: str
+    content: str
+    start_pos: int
+    end_pos: int
+    subsections: List['Section']
+
+class DocumentStructure:
+    """Manages markdown document structure and section access."""
+
+    def __init__(self):
+        self.sections: List[Section] = []
+        self.toc: Dict[str, Section] = {}
+
+    def parse_document(self, content: str) -> None:
+        """Parse markdown content into sections."""
+        self.sections = []
+        self.toc = {}
+
+        # Find all headers with their positions
+        header_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
+        headers = [(match.group(1), match.group(2), match.start(), match.end()) 
+                  for match in header_pattern.finditer(content)]
+
+        if not headers:
+            # If no headers, treat entire document as one section
+            self.sections = [Section(
+                level=0,
+                title="Document",
+                content=content,
+                start_pos=0,
+                end_pos=len(content),
+                subsections=[]
+            )]
+            return
+
+        # Process headers into sections
+        current_sections = []
+        for i, (hashes, title, start, header_end) in enumerate(headers):
+            level = len(hashes)
+
+            # Find section content (from end of this header to start of next, or end of document)
+            content_start = header_end
+            content_end = headers[i + 1][2] if i < len(headers) - 1 else len(content)
+            section_content = content[content_start:content_end].strip()
+
+            section = Section(
+                level=level,
+                title=title.strip(),
+                content=section_content,
+                start_pos=start,
+                end_pos=content_end,
+                subsections=[]
+            )
+
+            # Add to table of contents
+            section_id = self._make_section_id(title)
+            self.toc[section_id] = section
+
+            # Find parent section by checking levels
+            while current_sections and current_sections[-1].level >= level:
+                current_sections.pop()
+
+            if current_sections:
+                current_sections[-1].subsections.append(section)
+            else:
+                self.sections.append(section)
+
+            current_sections.append(section)
+
+    def get_section_by_id(self, section_id: str) -> Optional[Section]:
+        """Get a section by its ID."""
+        return self.toc.get(section_id)
+
+    def get_table_of_contents(self) -> List[Tuple[int, str, str]]:
+        """Get table of contents as [(level, title, section_id)]."""
+        toc_entries = []
+
+        def add_section(section: Section, prefix: str = ""):
+            section_id = self._make_section_id(section.title)
+            toc_entries.append((section.level, prefix + section.title, section_id))
+            for subsection in section.subsections:
+                add_section(subsection, prefix + "  ")
+
+        for section in self.sections:
+            add_section(section)
+
+        return toc_entries
+
+    def _make_section_id(self, title: str) -> str:
+        """Generate a URL-friendly section ID from title."""
+        # Convert to lowercase and replace spaces with hyphens
+        section_id = title.lower().replace(" ", "-")
+        # Remove any non-alphanumeric characters (except hyphens)
+        section_id = re.sub(r'[^a-z0-9-]', '', section_id)
+        # Remove multiple consecutive hyphens
+        section_id = re.sub(r'-+', '-', section_id)
+        return section_id.strip('-')
diff --git a/fast-markdown-mcp/src/fast_markdown_mcp/server.py b/fast-markdown-mcp/src/fast_markdown_mcp/server.py
@@ -13,22 +13,67 @@
 
 logger = logging.getLogger(__name__)
 
+from .document_structure import DocumentStructure
+
 class MarkdownStore:
     """Manages markdown content and metadata."""
 
     def __init__(self, storage_path: str):
         self.base_path = Path(storage_path)
         self.content_cache = {}
         self.metadata_cache = {}
+        self.structure_cache = {}  # Cache for parsed document structures
 
     async def get_content(self, file_id: str) -> str:
         """Get markdown content."""
         file_path = self.base_path / f"{file_id}.md"
         try:
-            return file_path.read_text(encoding='utf-8')
+            content = file_path.read_text(encoding='utf-8')
+            # Parse and cache document structure
+            if file_id not in self.structure_cache:
+                structure = DocumentStructure()
+                structure.parse_document(content)
+                self.structure_cache[file_id] = structure
+            return content
         except Exception as e:
             logger.error(f"Error reading content for {file_id}: {e}")
             return f"Error reading content: {str(e)}"
+
+    async def get_section(self, file_id: str, section_id: str) -> str:
+        """Get a specific section from a markdown file."""
+        try:
+            if file_id not in self.structure_cache:
+                await self.get_content(file_id)  # This will parse and cache the structure
+
+            structure = self.structure_cache[file_id]
+            section = structure.get_section_by_id(section_id)
+
+            if not section:
+                return f"Section '{section_id}' not found in {file_id}"
+
+            return f"Section: {section.title}\n\n{section.content}"
+        except Exception as e:
+            logger.error(f"Error getting section {section_id} from {file_id}: {e}")
+            return f"Error getting section: {str(e)}"
+
+    async def get_table_of_contents(self, file_id: str) -> str:
+        """Get table of contents for a markdown file."""
+        try:
+            if file_id not in self.structure_cache:
+                await self.get_content(file_id)  # This will parse and cache the structure
+
+            structure = self.structure_cache[file_id]
+            toc = structure.get_table_of_contents()
+
+            result = [f"Table of Contents for {file_id}:"]
+            for level, title, section_id in toc:
+                indent = "  " * level
+                result.append(f"{indent}- {title} [{section_id}]")
+
+            return "\n".join(result)
+        except Exception as e:
+            logger.error(f"Error getting table of contents for {file_id}: {e}")
+            return f"Error getting table of contents: {str(e)}"
 
     async def get_metadata(self, file_id: str) -> dict:
         """Get metadata as a dictionary."""
@@ -335,6 +380,38 @@ async def list_tools() -> list[types.Tool]:
                         "type": "object",
                         "properties": {}
                     }
+                ),
+                types.Tool(
+                    name="get_section",
+                    description="Get a specific section from a markdown file",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "file_id": {
+                                "type": "string",
+                                "description": "ID of the file (without .md extension)"
+                            },
+                            "section_id": {
+                                "type": "string",
+                                "description": "ID of the section to retrieve"
+                            }
+                        },
+                        "required": ["file_id", "section_id"]
+                    }
+                ),
+                types.Tool(
+                    name="get_table_of_contents",
+                    description="Get table of contents for a markdown file",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "file_id": {
+                                "type": "string",
+                                "description": "ID of the file (without .md extension)"
+                            }
+                        },
+                        "required": ["file_id"]
+                    }
                 )
             ]
 
@@ -373,6 +450,19 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
             elif name == "get_stats":
                 result = await self.store.get_stats()
                 return [types.TextContent(type="text", text=result)]
+            elif name == "get_section":
+                file_id = arguments.get("file_id")
+                section_id = arguments.get("section_id")
+                if not file_id or not section_id:
+                    raise ValueError("file_id and section_id are required")
+                result = await self.store.get_section(file_id, section_id)
+                return [types.TextContent(type="text", text=result)]
+            elif name == "get_table_of_contents":
+                file_id = arguments.get("file_id")
+                if not file_id:
+                    raise ValueError("file_id is required")
+                result = await self.store.get_table_of_contents(file_id)
+                return [types.TextContent(type="text", text=result)]
             else:
                 raise ValueError(f"Unknown tool: {name}")
 

diff --git a/storage/markdown/ai_pydantic_dev_.json b/storage/markdown/ai_pydantic_dev_.json
diff --git a/storage/markdown/docs_ag2_ai_docs_home.json b/storage/markdown/docs_ag2_ai_docs_home.json