|
| 1 | +import os |
| 2 | +import asyncio |
| 3 | +import logging |
| 4 | +import logging.config |
| 5 | +from lightrag import LightRAG, QueryParam |
| 6 | +from lightrag.llm.openai import gpt_4o_mini_complete |
| 7 | +from lightrag.kg.shared_storage import initialize_pipeline_status |
| 8 | +import json |
| 9 | +from typing import Optional |
| 10 | +from lightrag.utils import logger, set_verbose_debug |
| 11 | + |
| 12 | +######### |
| 13 | +# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert() |
| 14 | +# import nest_asyncio |
| 15 | +# nest_asyncio.apply() |
| 16 | +######### |
| 17 | + |
| 18 | +WORKING_DIR = "./dickens" |
| 19 | + |
| 20 | + |
| 21 | +def configure_logging(): |
| 22 | + """Configure logging for the application""" |
| 23 | + |
| 24 | + # Reset any existing handlers to ensure clean configuration |
| 25 | + for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: |
| 26 | + logger_instance = logging.getLogger(logger_name) |
| 27 | + logger_instance.handlers = [] |
| 28 | + logger_instance.filters = [] |
| 29 | + |
| 30 | + # Get log directory path from environment variable or use current directory |
| 31 | + log_dir = os.getenv("LOG_DIR", os.getcwd()) |
| 32 | + log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag_demo.log")) |
| 33 | + |
| 34 | + print(f"\nLightRAG demo log file: {log_file_path}\n") |
| 35 | + os.makedirs(os.path.dirname(log_dir), exist_ok=True) |
| 36 | + |
| 37 | + # Get log file max size and backup count from environment variables |
| 38 | + log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB |
| 39 | + log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups |
| 40 | + |
| 41 | + logging.config.dictConfig( |
| 42 | + { |
| 43 | + "version": 1, |
| 44 | + "disable_existing_loggers": False, |
| 45 | + "formatters": { |
| 46 | + "default": { |
| 47 | + "format": "%(levelname)s: %(message)s", |
| 48 | + }, |
| 49 | + "detailed": { |
| 50 | + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", |
| 51 | + }, |
| 52 | + }, |
| 53 | + "handlers": { |
| 54 | + "console": { |
| 55 | + "formatter": "default", |
| 56 | + "class": "logging.StreamHandler", |
| 57 | + "stream": "ext://sys.stderr", |
| 58 | + }, |
| 59 | + "file": { |
| 60 | + "formatter": "detailed", |
| 61 | + "class": "logging.handlers.RotatingFileHandler", |
| 62 | + "filename": log_file_path, |
| 63 | + "maxBytes": log_max_bytes, |
| 64 | + "backupCount": log_backup_count, |
| 65 | + "encoding": "utf-8", |
| 66 | + }, |
| 67 | + }, |
| 68 | + "loggers": { |
| 69 | + "lightrag": { |
| 70 | + "handlers": ["console", "file"], |
| 71 | + "level": "INFO", |
| 72 | + "propagate": False, |
| 73 | + }, |
| 74 | + }, |
| 75 | + } |
| 76 | + ) |
| 77 | + |
| 78 | + # Set the logger level to INFO |
| 79 | + logger.setLevel(logging.INFO) |
| 80 | + # Enable verbose debug if needed |
| 81 | + set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") |
| 82 | + |
| 83 | + |
| 84 | +if not os.path.exists(WORKING_DIR): |
| 85 | + os.mkdir(WORKING_DIR) |
| 86 | + |
| 87 | + |
| 88 | +async def initialize_rag(addon_params: Optional[dict] = None): |
| 89 | + rag_kwargs = { |
| 90 | + "working_dir": WORKING_DIR, |
| 91 | + "llm_model_func": gpt_4o_mini_complete, |
| 92 | + "addon_params": addon_params, |
| 93 | + } |
| 94 | + rag = LightRAG(**rag_kwargs) |
| 95 | + |
| 96 | + await rag.initialize_storages() |
| 97 | + await initialize_pipeline_status() |
| 98 | + |
| 99 | + return rag |
| 100 | + |
| 101 | + |
| 102 | +# create file based example, based on following proposed directory structure: |
| 103 | +# my_docs/ |
| 104 | +# └── books/ |
| 105 | +# ├── book1.txt |
| 106 | +# ├── book2.txt |
| 107 | +# └── articles/ |
| 108 | +# ├── article1.txt |
| 109 | +# ├── article2.txt |
| 110 | +# ├── insert_prompt_template.json |
| 111 | +# my_queries/ |
| 112 | +# └── articles/ |
| 113 | +# └── query_prompt_template.json |
| 114 | +# |
| 115 | +# prompt templates must follow default .utils.prompt.py template_key nomenclature and include same placeholders: |
| 116 | +# arg template_key type expected_placeholder_keys in {} |
| 117 | +# -------------------------------------------------------------------------------------------------- |
| 118 | +# global_config "language" str - |
| 119 | +# global_config "tuple_delimiter" str - |
| 120 | +# global_config "record_delimiter" str - |
| 121 | +# global_config "completion_delimiter" str - |
| 122 | +# global_config "similarity_check" str original_prompt,cached_prompt |
| 123 | +# -- |
| 124 | +# global_config "summarize_entity_descriptions" str language,entity_name,description_list |
| 125 | +# global_config "entity_extraction_examples" str tuple_delimiter,record_delimiter,completion_delimiter |
| 126 | +# global_config "entity_types" list[str] - |
| 127 | +# global_config "entity_extraction" str language,entity_types,tuple_delimiter,record_delimiter,completion_delimiter,examples,input_text |
| 128 | +# global_config "entity_continue_extraction" str entity_types,tuple_delimiter,language,record_delimiter,completion_delimiter |
| 129 | +# global_config "entity_if_loop_extraction" str - |
| 130 | +# global_config "keywords_extraction" str examples,history,query,language |
| 131 | +# global_config "keywords_extraction_examples" str - |
| 132 | +# -- |
| 133 | +# query_param "rag_response" str history,content_data,response_type |
| 134 | +# query_param "naive_rag_response" str history,content_data,response_type |
| 135 | +# query_param "mix_rag_response" str history,kg_context,vector_context,response_type |
| 136 | +# query_param "fail_rag_response" str - |
| 137 | + |
| 138 | + |
| 139 | +json.dump( |
| 140 | + {"entity_extraction_examples": ["device", "make", "model", "publication", "date"]}, |
| 141 | + open("./my_docs/articles/insert_template_prompts.json", "w"), |
| 142 | +) |
| 143 | +json.dump( |
| 144 | + {"rag_response": "System prompt specific to articles..."}, |
| 145 | + open("./my_queries/articles/query_template_prompts.json", "w"), |
| 146 | +) |
| 147 | + |
| 148 | +docs = { |
| 149 | + "books": { |
| 150 | + "file_paths": ["./books/book1.txt", "./books/book2.txt"], |
| 151 | + "addon_params": { |
| 152 | + "entity_extraction_examples": ["organization", "person", "location"], |
| 153 | + }, |
| 154 | + "system_prompts": { |
| 155 | + "rag_response": "KG mode system prompt specific to books...", |
| 156 | + "naive_rag_response": "Naive mode system prompt specific to books...", |
| 157 | + "mix_rag_response": "Mix mode system prompt specific to books...", |
| 158 | + }, |
| 159 | + }, |
| 160 | + "articles": { |
| 161 | + "file_paths": ["./articles/article1.txt", "./articles/article2.txt"], |
| 162 | + "addon_params": json.load( |
| 163 | + open("./my_docs/articles/insert_template_prompts.json", "r") |
| 164 | + ), |
| 165 | + "system_prompts": json.load( |
| 166 | + open("./my_queries/articles/query_template_prompts.json", "r") |
| 167 | + ), |
| 168 | + }, |
| 169 | +} |
| 170 | + |
| 171 | + |
| 172 | +def get_content(file_paths): |
| 173 | + contents = [] |
| 174 | + for fp in file_paths: |
| 175 | + with open(fp, "r", encoding="utf-8") as f: |
| 176 | + contents.append(f.read()) |
| 177 | + return contents |
| 178 | + |
| 179 | + |
| 180 | +async def main(): |
| 181 | + rag = None |
| 182 | + for doc_type, doc_info in docs.items(): |
| 183 | + # Insert differently per doc type |
| 184 | + file_paths = doc_info["file_paths"] |
| 185 | + addon_params = doc_info["addon_params"] |
| 186 | + |
| 187 | + # Initialize the RAG instance for each document type |
| 188 | + print("\n=====================") |
| 189 | + print(f"Initializing RAG for {doc_type}") |
| 190 | + print(f"Inserting with custom {addon_params}") |
| 191 | + print("=====================") |
| 192 | + try: |
| 193 | + rag = await initialize_rag(addon_params) |
| 194 | + |
| 195 | + contents = get_content(file_paths) |
| 196 | + await rag.ainsert(contents, file_paths=file_paths) |
| 197 | + except Exception as e: |
| 198 | + print(f"An error occurred: {e}") |
| 199 | + finally: |
| 200 | + if rag: |
| 201 | + await rag.finalize_storages() |
| 202 | + |
| 203 | + rag = None |
| 204 | + addon_params = None |
| 205 | + try: |
| 206 | + rag = await initialize_rag(addon_params) |
| 207 | + # Perform naive search |
| 208 | + # for specific to `books` type queries |
| 209 | + print("\n=====================") |
| 210 | + print("Query mode: naive") |
| 211 | + print("=====================") |
| 212 | + print( |
| 213 | + await rag.aquery( |
| 214 | + "What are the top themes in this story?", |
| 215 | + param=QueryParam(mode="naive"), |
| 216 | + system_prompt=docs["books"]["system_prompts"][ |
| 217 | + "naive_rag_response" |
| 218 | + ], # Use the naive mode specific system prompt for book concepts |
| 219 | + ) |
| 220 | + ) |
| 221 | + # Perform hybrid search |
| 222 | + # for specific to `articles` type queries |
| 223 | + print("\n=====================") |
| 224 | + print("Query mode: hybrid") |
| 225 | + print("=====================") |
| 226 | + print( |
| 227 | + await rag.aquery( |
| 228 | + "What are the top themes in this story?", |
| 229 | + param=QueryParam(mode="hybrid"), |
| 230 | + system_prompt=docs["articles"]["system_prompts"][ |
| 231 | + "rag_response" |
| 232 | + ], # Use the hybrid mode specific system prompt for article concepts |
| 233 | + ) |
| 234 | + ) |
| 235 | + except Exception as e: |
| 236 | + print(f"An error occurred: {e}") |
| 237 | + finally: |
| 238 | + if rag: |
| 239 | + await rag.finalize_storages() |
| 240 | + |
| 241 | + |
| 242 | +if __name__ == "__main__": |
| 243 | + # Configure logging before running the main function |
| 244 | + configure_logging() |
| 245 | + asyncio.run(main()) |
| 246 | + print("\nDone!") |
0 commit comments