Skip to content

Commit 01aee34

Browse files
committed
fix linting
1 parent db20f53 commit 01aee34

File tree

1 file changed

+246
-0
lines changed

1 file changed

+246
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
import os
2+
import asyncio
3+
import logging
4+
import logging.config
5+
from lightrag import LightRAG, QueryParam
6+
from lightrag.llm.openai import gpt_4o_mini_complete
7+
from lightrag.kg.shared_storage import initialize_pipeline_status
8+
import json
9+
from typing import Optional
10+
from lightrag.utils import logger, set_verbose_debug
11+
12+
#########
13+
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
14+
# import nest_asyncio
15+
# nest_asyncio.apply()
16+
#########
17+
18+
WORKING_DIR = "./dickens"
19+
20+
21+
def configure_logging():
22+
"""Configure logging for the application"""
23+
24+
# Reset any existing handlers to ensure clean configuration
25+
for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]:
26+
logger_instance = logging.getLogger(logger_name)
27+
logger_instance.handlers = []
28+
logger_instance.filters = []
29+
30+
# Get log directory path from environment variable or use current directory
31+
log_dir = os.getenv("LOG_DIR", os.getcwd())
32+
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_demo.log"))
33+
34+
print(f"\nLightRAG demo log file: {log_file_path}\n")
35+
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
36+
37+
# Get log file max size and backup count from environment variables
38+
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
39+
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
40+
41+
logging.config.dictConfig(
42+
{
43+
"version": 1,
44+
"disable_existing_loggers": False,
45+
"formatters": {
46+
"default": {
47+
"format": "%(levelname)s: %(message)s",
48+
},
49+
"detailed": {
50+
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
51+
},
52+
},
53+
"handlers": {
54+
"console": {
55+
"formatter": "default",
56+
"class": "logging.StreamHandler",
57+
"stream": "ext://sys.stderr",
58+
},
59+
"file": {
60+
"formatter": "detailed",
61+
"class": "logging.handlers.RotatingFileHandler",
62+
"filename": log_file_path,
63+
"maxBytes": log_max_bytes,
64+
"backupCount": log_backup_count,
65+
"encoding": "utf-8",
66+
},
67+
},
68+
"loggers": {
69+
"lightrag": {
70+
"handlers": ["console", "file"],
71+
"level": "INFO",
72+
"propagate": False,
73+
},
74+
},
75+
}
76+
)
77+
78+
# Set the logger level to INFO
79+
logger.setLevel(logging.INFO)
80+
# Enable verbose debug if needed
81+
set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true")
82+
83+
84+
if not os.path.exists(WORKING_DIR):
85+
os.mkdir(WORKING_DIR)
86+
87+
88+
async def initialize_rag(addon_params: Optional[dict] = None):
89+
rag_kwargs = {
90+
"working_dir": WORKING_DIR,
91+
"llm_model_func": gpt_4o_mini_complete,
92+
"addon_params": addon_params,
93+
}
94+
rag = LightRAG(**rag_kwargs)
95+
96+
await rag.initialize_storages()
97+
await initialize_pipeline_status()
98+
99+
return rag
100+
101+
102+
# create file based example, based on following proposed directory structure:
103+
# my_docs/
104+
# └── books/
105+
# ├── book1.txt
106+
# ├── book2.txt
107+
# └── articles/
108+
# ├── article1.txt
109+
# ├── article2.txt
110+
# ├── insert_prompt_template.json
111+
# my_queries/
112+
# └── articles/
113+
# └── query_prompt_template.json
114+
#
115+
# prompt templates must follow default .utils.prompt.py template_key nomenclature and include same placeholders:
116+
# arg template_key type expected_placeholder_keys in {}
117+
# --------------------------------------------------------------------------------------------------
118+
# global_config "language" str -
119+
# global_config "tuple_delimiter" str -
120+
# global_config "record_delimiter" str -
121+
# global_config "completion_delimiter" str -
122+
# global_config "similarity_check" str original_prompt,cached_prompt
123+
# --
124+
# global_config "summarize_entity_descriptions" str language,entity_name,description_list
125+
# global_config "entity_extraction_examples" str tuple_delimiter,record_delimiter,completion_delimiter
126+
# global_config "entity_types" list[str] -
127+
# global_config "entity_extraction" str language,entity_types,tuple_delimiter,record_delimiter,completion_delimiter,examples,input_text
128+
# global_config "entity_continue_extraction" str entity_types,tuple_delimiter,language,record_delimiter,completion_delimiter
129+
# global_config "entity_if_loop_extraction" str -
130+
# global_config "keywords_extraction" str examples,history,query,language
131+
# global_config "keywords_extraction_examples" str -
132+
# --
133+
# query_param "rag_response" str history,content_data,response_type
134+
# query_param "naive_rag_response" str history,content_data,response_type
135+
# query_param "mix_rag_response" str history,kg_context,vector_context,response_type
136+
# query_param "fail_rag_response" str -
137+
138+
139+
json.dump(
140+
{"entity_extraction_examples": ["device", "make", "model", "publication", "date"]},
141+
open("./my_docs/articles/insert_template_prompts.json", "w"),
142+
)
143+
json.dump(
144+
{"rag_response": "System prompt specific to articles..."},
145+
open("./my_queries/articles/query_template_prompts.json", "w"),
146+
)
147+
148+
docs = {
149+
"books": {
150+
"file_paths": ["./books/book1.txt", "./books/book2.txt"],
151+
"addon_params": {
152+
"entity_extraction_examples": ["organization", "person", "location"],
153+
},
154+
"system_prompts": {
155+
"rag_response": "KG mode system prompt specific to books...",
156+
"naive_rag_response": "Naive mode system prompt specific to books...",
157+
"mix_rag_response": "Mix mode system prompt specific to books...",
158+
},
159+
},
160+
"articles": {
161+
"file_paths": ["./articles/article1.txt", "./articles/article2.txt"],
162+
"addon_params": json.load(
163+
open("./my_docs/articles/insert_template_prompts.json", "r")
164+
),
165+
"system_prompts": json.load(
166+
open("./my_queries/articles/query_template_prompts.json", "r")
167+
),
168+
},
169+
}
170+
171+
172+
def get_content(file_paths):
173+
contents = []
174+
for fp in file_paths:
175+
with open(fp, "r", encoding="utf-8") as f:
176+
contents.append(f.read())
177+
return contents
178+
179+
180+
async def main():
181+
rag = None
182+
for doc_type, doc_info in docs.items():
183+
# Insert differently per doc type
184+
file_paths = doc_info["file_paths"]
185+
addon_params = doc_info["addon_params"]
186+
187+
# Initialize the RAG instance for each document type
188+
print("\n=====================")
189+
print(f"Initializing RAG for {doc_type}")
190+
print(f"Inserting with custom {addon_params}")
191+
print("=====================")
192+
try:
193+
rag = await initialize_rag(addon_params)
194+
195+
contents = get_content(file_paths)
196+
await rag.ainsert(contents, file_paths=file_paths)
197+
except Exception as e:
198+
print(f"An error occurred: {e}")
199+
finally:
200+
if rag:
201+
await rag.finalize_storages()
202+
203+
rag = None
204+
addon_params = None
205+
try:
206+
rag = await initialize_rag(addon_params)
207+
# Perform naive search
208+
# for specific to `books` type queries
209+
print("\n=====================")
210+
print("Query mode: naive")
211+
print("=====================")
212+
print(
213+
await rag.aquery(
214+
"What are the top themes in this story?",
215+
param=QueryParam(mode="naive"),
216+
system_prompt=docs["books"]["system_prompts"][
217+
"naive_rag_response"
218+
], # Use the naive mode specific system prompt for book concepts
219+
)
220+
)
221+
# Perform hybrid search
222+
# for specific to `articles` type queries
223+
print("\n=====================")
224+
print("Query mode: hybrid")
225+
print("=====================")
226+
print(
227+
await rag.aquery(
228+
"What are the top themes in this story?",
229+
param=QueryParam(mode="hybrid"),
230+
system_prompt=docs["articles"]["system_prompts"][
231+
"rag_response"
232+
], # Use the hybrid mode specific system prompt for article concepts
233+
)
234+
)
235+
except Exception as e:
236+
print(f"An error occurred: {e}")
237+
finally:
238+
if rag:
239+
await rag.finalize_storages()
240+
241+
242+
if __name__ == "__main__":
243+
# Configure logging before running the main function
244+
configure_logging()
245+
asyncio.run(main())
246+
print("\nDone!")

0 commit comments

Comments
 (0)