Merge pull request langchain-ai#1 from langchain-ai/wfh/mods

Add CLI and syntax checker
aaronbarrett03 · Feb 28, 2024 · a0ad4a9 · a0ad4a9
2 parents 6f20ba6 + afd9710
commit a0ad4a9
Show file tree

Hide file tree

Showing 12 changed files with 2,241 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+.ipynb_checkpoints
+Untitled*.ipynb
+
diff --git a/README.md b/README.md
@@ -0,0 +1,22 @@
+# Langgraph-Engineer
+
+
+A (very alpha) CLI and corresponding notebook for langgraph app generation.
+
+To use, install:
+
+```bash
+pip install -U langgraph-engineer
+```
+
+You can generate from only a description, or you can pass in a diagram image.
+
+```bash
+langgraph-engineer create --description "A RAG app over my local PDF" --diagram "path/to/diagram.png"
+```
+
+For example:
+
+```bash
+langgraph-engineer create --description "A corrective RAG app" --diagram "CRAG.jpg"
+```
diff --git a/langgraph-engineer.ipynb b/langgraph-engineer.ipynb
@@ -34,7 +34,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
+   "id": "5fa764c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install -U langgraph bs4 langchain_community"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
    "id": "c68bdd2d-7a2e-4b2e-b153-fa85c036ae24",
    "metadata": {},
    "outputs": [],
@@ -410,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,

diff --git a/langgraph_engineer/__init__.py b/langgraph_engineer/__init__.py
diff --git a/langgraph_engineer/code_utils.py b/langgraph_engineer/code_utils.py
@@ -0,0 +1,24 @@
+from tempfile import NamedTemporaryFile
+from typing_extensions import TypedDict
+from ruff.__main__ import find_ruff_bin
+import subprocess
+
+
+class LintOutput(TypedDict):
+    out: str
+    error: str
+
+def run_ruff(code: str) -> LintOutput:
+    with NamedTemporaryFile(mode="w", suffix=".py") as f:
+        f.write(code)
+        f.seek(0)
+        ruff_binary = find_ruff_bin()
+        res = subprocess.run([ruff_binary, f.name], capture_output=True)
+        output, err = res.stdout, res.stderr
+        # Replace the temp file name
+        result = output.decode().replace(f.name, "code.py")
+        error = err.decode().replace(f.name, "code.py")
+        return {
+            "out": result,
+            "error": error,
+        }
diff --git a/langgraph_engineer/constants.py b/langgraph_engineer/constants.py
@@ -0,0 +1,4 @@
+from pathlib import Path
+
+DOCS_DIR = Path(__file__).parent / "data"
+DOCS_PATH = DOCS_DIR / "docs.json"
diff --git a/langgraph_engineer/docs.json b/langgraph_engineer/docs.json
diff --git a/langgraph_engineer/ingest.py b/langgraph_engineer/ingest.py
@@ -0,0 +1,78 @@
+import functools
+import logging
+
+from bs4 import BeautifulSoup
+from langchain_community.document_loaders.recursive_url_loader import \
+    RecursiveUrlLoader
+from langchain_core.load import dumps, loads
+from langgraph_engineer.constants import DOCS_PATH
+import warnings
+
+logger = logging.getLogger(__name__)
+
+
+def html_to_markdown(tag):
+    if tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
+        level = int(tag.name[1])
+        return f"{'#' * level} {tag.get_text()}\n\n"
+    elif tag.name == "pre":
+        code_content = tag.find("code")
+        if code_content:
+            return f"```\n{code_content.get_text()}\n```\n\n"
+        else:
+            return f"```\n{tag.get_text()}\n```\n\n"
+    elif tag.name == "p":
+        return f"{tag.get_text()}\n\n"
+    return ""
+
+
+def clean_document(html_content):
+    soup = BeautifulSoup(html_content, "html.parser")
+    markdown_content = ""
+    for child in soup.recursiveChildGenerator():
+        if child.name:
+            markdown_content += html_to_markdown(child)
+    return markdown_content
+
+
+def ingest(dry_run: bool = False):
+    logger.info("Ingesting documents...")
+    # LangGraph docs
+    url = "https://python.langchain.com/docs/langgraph/"
+    loader = RecursiveUrlLoader(
+        url=url, max_depth=20, extractor=lambda x: clean_document(x)
+    )
+    docs = loader.load()
+
+    # Sort the list based on the URLs in 'metadata' -> 'source'
+    d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
+    d_reversed = list(reversed(d_sorted))
+
+    if dry_run:
+        print(_format_docs(d_reversed))
+        return
+    # Dump the documents to 'DOCS_PATH'
+    docs_str = dumps(d_reversed)
+    with DOCS_PATH.open("w") as f:
+        f.write(docs_str)
+    logger.info("Documents ingested.")
+
+
+def _format_docs(docs):
+    return "\n\n\n --- \n\n\n".join([doc.page_content for doc in docs])
+
+
+@functools.lru_cache
+def load_docs() -> str:
+    # Load the documents from 'DOCS_PATH'
+    if not DOCS_PATH.exists():
+        logger.warning("No documents found. Ingesting documents...")
+        ingest()
+    with DOCS_PATH.open("r") as f:
+        # Suppress warnings
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            d_reversed = loads(f.read())
+
+    # Concatenate the 'page_content'
+    return _format_docs(d_reversed)
diff --git a/langgraph_engineer/main.py b/langgraph_engineer/main.py
@@ -0,0 +1,72 @@
+import logging
+from pathlib import Path
+from typing import List, Optional
+
+import typer
+from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_core.utils import image as image_utils
+from langgraph.graph import END
+from langgraph_engineer import ingest, system
+from typing_extensions import Annotated
+
+
+logging.basicConfig(level=logging.INFO)
+
+app = typer.Typer(no_args_is_help=True, add_completion=True)
+
+
+@app.command(name="create")
+def create(
+    description: str = typer.Argument(
+        ..., help="Description of the application to be created."
+    ),
+    diagram: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to the image file to be used as the base for the graph"
+        ),
+    ] = None,
+    output: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to the file where the graph should be saved. Default is stdout.",
+        ),
+    ] = None,
+):
+    """
+    Create a graph from an image file.
+    """
+    graph_ = system.build_graph()
+    if diagram:
+        image = image_utils.image_to_data_url(str(diagram))
+        content = [{"type": "image_url", "image_url": image}]
+    content.append({"type": "text", "text": description})
+    last_chunk = None
+    for chunk in graph_.stream(HumanMessage(content=content)):
+        typer.echo(f"Running step {next(iter(chunk))}...")
+        last_chunk = chunk
+    code_content = ""
+    if last_chunk:
+        messages: List[BaseMessage] = last_chunk[END]
+        code_content = messages[-1].content
+    if output:
+        with output.open("w") as f:
+            f.write(code_content)
+    else:
+        typer.echo(code_content)
+
+
+@app.command(name="ingest")
+def ingest_docs(
+    dry_run: bool = typer.Option(
+        False, help="Print the ingested documents instead of writing them to file."
+    )
+):
+    """
+    Ingest a file into the graph.
+    """
+    ingest.ingest(dry_run=dry_run)
+
+
+if __name__ == "__main__":
+    app()