add local RAG CLI variant (#916)

run-llama · Feb 5, 2024 · 5619d9a · 5619d9a
1 parent 20c2f59
commit 5619d9a
Show file tree

Hide file tree

Showing 7 changed files with 200 additions and 6 deletions.
diff --git a/llama_hub/llama_packs/library.json b/llama_hub/llama_packs/library.json
@@ -287,5 +287,10 @@
     "id": "llama_packs/research/infer_retrieve_rerank",
     "author": "jerryjliu",
     "keywords": ["infer", "retrieve", "rerank", "retriever", "rag"]
+  },
+  "LocalRAGCLIPack": {
+    "id": "llama_packs/rag_cli_local",
+    "author": "jerryjliu",
+    "keywords": ["rag", "cli", "local"]
   }
 }
diff --git a/llama_hub/llama_packs/rag_cli_local/README.md b/llama_hub/llama_packs/rag_cli_local/README.md
@@ -0,0 +1,60 @@
+# RAG Local CLI Pack
+
+This LlamaPack implements a fully local version of our [RAG CLI](https://docs.llamaindex.ai/en/stable/use_cases/q_and_a/rag_cli.html),
+with Mistral (through Ollama) and [BGE-M3](https://huggingface.co/BAAI/bge-m3).
+
+## CLI Usage
+
+You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package:
+
+```bash
+llamaindex-cli download-llamapack LocalRAGCLIPack --download-dir ./local_rag_cli_pack
+```
+
+You can then inspect the files at `./local_rag_cli_pack` and use them as a template for your own project!
+
+## Code Usage
+
+You can download the pack to a directory. **NOTE**: You must specify `skip_load=True` - the pack contains multiple files,
+which makes it hard to load directly.
+
+We will show you how to import the agent from these files!
+
+```python
+from llama_index.llama_pack import download_llama_pack
+
+# download and install dependencies
+download_llama_pack(
+  "LocalRAGCLIPack", "./local_rag_cli_pack", skip_load=True
+)
+```
+
+From here, you can use the pack. The most straightforward way is through the CLI. You can directly run base.py, or run the `setup_cli.sh` script.
+
+```bash
+cd local_rag_cli_pack
+
+# option 1
+python base.py rag -h
+
+# option 2 - you may need sudo
+# default name is lcli_local
+sudo sh setup_cli.sh
+lcli_local rag -h
+
+```
+
+You can also directly get modules from the pack.
+
+```python
+from local_rag_cli_pack.base import LocalRAGCLIPack
+
+pack = LocalRAGCLIPack(verbose=True, llm_model_name="mistral", embed_model_name="BAAI/bge-m3")
+# will spin up the CLI
+pack.run()
+
+# get modules
+rag_cli = pack.get_modules()["rag_cli"]
+rag_cli.cli()
+
+```
diff --git a/llama_hub/llama_packs/rag_cli_local/__init__.py b/llama_hub/llama_packs/rag_cli_local/__init__.py
diff --git a/llama_hub/llama_packs/rag_cli_local/base.py b/llama_hub/llama_packs/rag_cli_local/base.py
@@ -0,0 +1,117 @@
+"""Local RAG CLI Pack."""
+
+from llama_index.ingestion import IngestionPipeline, IngestionCache
+from llama_index.query_pipeline.query import QueryPipeline
+from llama_index.storage.docstore import SimpleDocumentStore
+from llama_index.command_line.rag import RagCLI
+from llama_index.text_splitter import SentenceSplitter
+from llama_index.embeddings import HuggingFaceEmbedding
+from llama_index.llms import Ollama
+from llama_index.vector_stores import ChromaVectorStore
+from llama_index.utils import get_cache_dir
+from llama_index import ServiceContext, VectorStoreIndex
+from llama_index.response_synthesizers import CompactAndRefine
+from llama_index.query_pipeline import InputComponent
+from llama_index.llama_pack.base import BaseLlamaPack
+from typing import Optional, Dict, Any
+from pathlib import Path
+import chromadb
+
+
+def default_ragcli_persist_dir() -> str:
+    """Get default RAG CLI persist dir."""
+    return str(Path(get_cache_dir()) / "rag_cli_local")
+
+
+def init_local_rag_cli(
+    persist_dir: Optional[str] = None,
+    verbose: bool = False,
+    llm_model_name: str = "mistral",
+    embed_model_name: str = "BAAI/bge-m3",
+) -> RagCLI:
+    """Init local RAG CLI."""
+
+    docstore = SimpleDocumentStore()
+    persist_dir = persist_dir or default_ragcli_persist_dir()
+    chroma_client = chromadb.PersistentClient(path=persist_dir)
+    chroma_collection = chroma_client.create_collection("default", get_or_create=True)
+    vector_store = ChromaVectorStore(
+        chroma_collection=chroma_collection, persist_dir=persist_dir
+    )
+    print("> Chroma collection initialized")
+    llm = Ollama(model=llm_model_name, request_timeout=30.0)
+    print("> LLM initialized")
+    embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
+    print("> Embedding model initialized")
+
+    ingestion_pipeline = IngestionPipeline(
+        transformations=[SentenceSplitter(), embed_model],
+        vector_store=vector_store,
+        docstore=docstore,
+        cache=IngestionCache(),
+    )
+
+    service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
+    retriever = VectorStoreIndex.from_vector_store(
+        ingestion_pipeline.vector_store, service_context=service_context
+    ).as_retriever(similarity_top_k=8)
+    response_synthesizer = CompactAndRefine(
+        service_context=service_context, streaming=True, verbose=True
+    )
+    # define query pipeline
+    query_pipeline = QueryPipeline(verbose=verbose)
+    query_pipeline.add_modules(
+        {
+            "input": InputComponent(),
+            "retriever": retriever,
+            "summarizer": response_synthesizer,
+        }
+    )
+    query_pipeline.add_link("input", "retriever")
+    query_pipeline.add_link("retriever", "summarizer", dest_key="nodes")
+    query_pipeline.add_link("input", "summarizer", dest_key="query_str")
+
+    rag_cli_instance = RagCLI(
+        ingestion_pipeline=ingestion_pipeline,
+        llm=llm,  # optional
+        persist_dir=persist_dir,
+        query_pipeline=query_pipeline,
+        verbose=False,
+    )
+    return rag_cli_instance
+
+
+class LocalRAGCLIPack(BaseLlamaPack):
+    """Local RAG CLI Pack."""
+
+    def __init__(
+        self,
+        verbose: bool = False,
+        persist_dir: Optional[str] = None,
+        llm_model_name: str = "mistral",
+        embed_model_name: str = "BAAI/bge-m3",
+    ) -> None:
+        """Init params."""
+        self.verbose = verbose
+        self.persist_dir = persist_dir or default_ragcli_persist_dir()
+        self.llm_model_name = llm_model_name
+        self.embed_model_name = embed_model_name
+        self.rag_cli = init_local_rag_cli(
+            persist_dir=self.persist_dir,
+            verbose=self.verbose,
+            llm_model_name=self.llm_model_name,
+            embed_model_name=self.embed_model_name,
+        )
+
+    def get_modules(self) -> Dict[str, Any]:
+        """Get modules."""
+        return {"rag_cli": self.rag_cli}
+
+    def run(self, *args: Any, **kwargs: Any) -> Any:
+        """Run the pipeline."""
+        return self.rag_cli.cli(*args, **kwargs)
+
+
+if __name__ == "__main__":
+    rag_cli_instance = init_local_rag_cli()
+    rag_cli_instance.cli()
diff --git a/llama_hub/llama_packs/rag_cli_local/requirements.txt b/llama_hub/llama_packs/rag_cli_local/requirements.txt
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,7 @@ include = [
 [tool.poetry.dependencies]
 # Updated Python version
 python = ">=3.8.1,<3.12"
-llama-index = ">=0.9.39"
+llama-index = ">=0.9.41"
 html2text = "*"
 psutil = "*"
 retrying = "*"