BUPT-GAMMA · nayan458 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+*.log
+__pycache__
+*.graphml
+*.json
+.env
+
+graph
+kv_store
+vdb
diff --git a/PathRAG/llm.py b/PathRAG/llm.py
@@ -35,8 +35,12 @@
     logger,
 )
 
+from dotenv import load_dotenv
+
 import sys
 
+load_dotenv()
+
 if sys.version_info < (3, 9):
     from typing import AsyncIterator
 else:
@@ -55,8 +59,8 @@ async def openai_complete_if_cache(
     prompt,
     system_prompt=None,
     history_messages=[],
-    base_url="https://api.openai.com/v1",
-    api_key="",
+    base_url=os.getenv("BASE_URL"),
+    api_key=os.getenv("API_KEY"),
     **kwargs,
 ) -> str:
     if api_key:
@@ -764,8 +768,8 @@ async def zhipu_embedding(
 async def openai_embedding(
     texts: list[str],
     model: str = "text-embedding-3-small",
-    base_url="https://api.openai.com/v1",
-    api_key="",
+    base_url=os.getenv("BASE_URL"),
+    api_key=os.getenv("API_KEY"),
 ) -> np.ndarray:
     if api_key:
         os.environ["OPENAI_API_KEY"] = api_key

diff --git a/PathRAG/settings.py b/PathRAG/settings.py
@@ -0,0 +1,9 @@
+from pathlib import Path
+from dotenv import load_dotenv
+import os
+
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+GRAPH_FILE_PATH = os.path.join(BASE_DIR,'graph')
+KV_STORE_FILE_PATH = os.path.join(BASE_DIR,'kv_store')
+VDB_FILE__PATH = os.path.join(BASE_DIR,'vdb')
diff --git a/PathRAG/storage.py b/PathRAG/storage.py
@@ -21,12 +21,15 @@
     BaseVectorStorage,
 )
 
+from .settings import KV_STORE_FILE_PATH
+from .settings import GRAPH_FILE_PATH
+from .settings import VDB_FILE__PATH
+
 
 @dataclass
 class JsonKVStorage(BaseKVStorage):
     def __post_init__(self):
-        working_dir = self.global_config["working_dir"]
-        self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
+        self._file_name = os.path.join(KV_STORE_FILE_PATH, f"kv_store_{self.namespace}.json")
         self._data = load_json(self._file_name) or {}
         logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
 
@@ -68,9 +71,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
     cosine_better_than_threshold: float = 0.2
 
     def __post_init__(self):
-        self._client_file_name = os.path.join(
-            self.global_config["working_dir"], f"vdb_{self.namespace}.json"
-        )
+        self._client_file_name = os.path.join(VDB_FILE__PATH, f"vdb_{self.namespace}.json")
         self._max_batch_size = self.global_config["embedding_batch_num"]
         self._client = NanoVectorDB(
             self.embedding_func.embedding_dim, storage_file=self._client_file_name
@@ -242,9 +243,7 @@ def _get_edge_key(source: Any, target: Any) -> str:
         return fixed_graph
 
     def __post_init__(self):
-        self._graphml_xml_file = os.path.join(
-            self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
-        )
+        self._graphml_xml_file = os.path.join(GRAPH_FILE_PATH, f"graph_{self.namespace}.graphml")
         preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
         if preloaded_graph is not None:
             logger.info(

diff --git a/README.md b/README.md
@@ -2,41 +2,38 @@ The code for the paper **"PathRAG: Pruning Graph-based Retrieval Augmented Gener
 ## Install
 ```bash
 cd PathRAG
-pip install -e .
+pip install -e . # or pip install -r requirements.txt 
 ```
-## Quick Start
-* You can quickly experience this project in the `v1_test.py` file.
-* Set OpenAI API key in environment if using OpenAI models: `api_key="sk-...".` in the `v1_test.py` and `llm.py` file
-* Prepare your retrieval document "text.txt".
-* Use the following Python snippet in the "v1_text.py" file to initialize PathRAG and perform queries.
-
-```python
-import os
-from PathRAG import PathRAG, QueryParam
-from PathRAG.llm import gpt_4o_mini_complete
 
-WORKING_DIR = "./your_working_dir"
-api_key="your_api_key"
-os.environ["OPENAI_API_KEY"] = api_key
-base_url="https://api.openai.com/v1"
-os.environ["OPENAI_API_BASE"]=base_url
+## RUN the project
 
+### Windows
 
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
+```bash
+python -m venv .venv    # create virtual environment
+.venv\Scripts\activate  # activate the virtual environment
+python v1_test.py       # to run the project
 
-rag = PathRAG(
-    working_dir=WORKING_DIR,
-    llm_model_func=gpt_4o_mini_complete,  
-)
+# if it doesn't works properly then try reinstalling the packages using the above installation command
+```
 
-data_file="./text.txt"
-question="your_question"
-with open(data_file) as f:
-    rag.insert(f.read())
+### Linux/Unix
 
-print(rag.query(question, param=QueryParam(mode="hybrid")))
+```bash
+python3 -m venv .venv    # create virtual environment
+Source .venv\bin\activate  # activate the virtual environment
+python3 v1_test.py       # to run the project
+
+# if it doesn't works properly then try reinstalling the packages using the above installation command
 ```
+
+## Quick Start
+* You can quickly experience this project in the `v1_test.py` file.
+* Rename `exampe.env` to `.env`
+* Set OpenAI API key in `.env` file and the BASE URL.
+* Prepare your retrieval document `text.txt`. You can modify this in the code in `v1_test.py`.
+* The `v1_text.py` file is the entry point to initialize PathRAG and perform queries.
+
 ## Parameter modification
 You can adjust the relevant parameters in the `base.py` and `operate.py` files.
 

diff --git a/example.env b/example.env
@@ -0,0 +1,2 @@
+API_KEY=
+BASE_URL=https://api.openai.com/v1
diff --git a/v1_test.py b/v1_test.py
@@ -1,13 +1,37 @@
 import os
 from PathRAG import PathRAG, QueryParam
 from PathRAG.llm import gpt_4o_mini_complete
+from pathlib import Path
 
-WORKING_DIR = ""
+from dotenv import load_dotenv
 
-api_key=""
+load_dotenv()
+
+WORKING_DIR = "./PathRAG"
+
+# Define storage paths
+BASE_DIR = Path(__file__).resolve().parent
+GRAPH_FILE_PATH = os.path.join(BASE_DIR, 'graph')
+KV_STORE_FILE_PATH = os.path.join(BASE_DIR, 'kv_store')
+VDB_FILE_PATH = os.path.join(BASE_DIR, 'vdb')
+
+# Ensure directories exist
+for path in [GRAPH_FILE_PATH, KV_STORE_FILE_PATH, VDB_FILE_PATH]:
+    os.makedirs(path, exist_ok=True)
+
+# Ensure necessary JSON files exist
+for file_name in ["kv_store_full_docs.json", "kv_store_text_chunks.json", "kv_store_llm_response_cache.json"]:
+    file_path = os.path.join(KV_STORE_FILE_PATH, file_name)
+    if not os.path.exists(file_path):
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.write("{}")  # Initialize with empty JSON object
+
+# Set up API keys
+api_key = os.getenv("API_KEY")
 os.environ["OPENAI_API_KEY"] = api_key
-base_url="https://api.openai.com/v1"
-os.environ["OPENAI_API_BASE"]=base_url
+base_url = os.getenv("BASE_URL")
+os.environ["OPENAI_API_BASE"] = base_url
+
 
 
 if not os.path.exists(WORKING_DIR):
@@ -18,10 +42,16 @@
     llm_model_func=gpt_4o_mini_complete,  
 )
 
-data_file=""
-question=""
-with open(data_file) as f:
-    rag.insert(f.read())
+data_file="text.txt"
+question="what is this document all about?"
+
+with open(data_file, "r", encoding="utf-8") as f:
+    file_content = f.read().strip()  
+
+if not file_content:
+    raise ValueError("The input file is empty. Please provide valid content.")
+
+rag.insert(file_content)
 
 print(rag.query(question, param=QueryParam(mode="hybrid")))