Skip to content

Commit 86aae73

Browse files
committed
Enhance GitHub RAG application: Introduced retry mechanism for error handling, added new custom exceptions for validation and processing errors, improved session cleanup logic, and refined repository name extraction. Updated logging format for better traceability.
1 parent f21cdcd commit 86aae73

File tree

1 file changed

+81
-13
lines changed

1 file changed

+81
-13
lines changed

github-rag/app.py

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import pandas as pd
66
from typing import Optional, Dict, Any
77
import logging
8+
from functools import wraps
9+
import time
810

911
from gitingest import ingest
1012
from llama_index.core import Settings, PromptTemplate, VectorStoreIndex, SimpleDirectoryReader
@@ -13,51 +15,117 @@
1315
from dotenv import load_dotenv
1416

1517
# Configure logging
16-
logging.basicConfig(level=logging.INFO)
18+
logging.basicConfig(
19+
level=logging.INFO,
20+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
21+
)
1722
logger = logging.getLogger(__name__)
1823

1924
load_dotenv()
2025

2126
# Constants
2227
MAX_REPO_SIZE = 100 * 1024 * 1024 # 100MB
2328
SUPPORTED_REPO_TYPES = ['.py', '.md', '.ipynb', '.js', '.ts', '.json']
29+
MAX_RETRIES = 3
30+
RETRY_DELAY = 1 # seconds
2431

2532
class GitHubRAGError(Exception):
26-
"""Custom exception for GitHub RAG application errors"""
33+
"""Base exception for GitHub RAG application errors"""
2734
pass
2835

36+
class ValidationError(GitHubRAGError):
37+
"""Raised when input validation fails"""
38+
pass
39+
40+
class ProcessingError(GitHubRAGError):
41+
"""Raised when repository processing fails"""
42+
pass
43+
44+
class QueryEngineError(GitHubRAGError):
45+
"""Raised when query engine creation or operation fails"""
46+
pass
47+
48+
class SessionError(GitHubRAGError):
49+
"""Raised when session management fails"""
50+
pass
51+
52+
def retry_on_error(max_retries=MAX_RETRIES, delay=RETRY_DELAY):
53+
"""Decorator for retrying operations on failure"""
54+
def decorator(func):
55+
@wraps(func)
56+
def wrapper(*args, **kwargs):
57+
last_exception = None
58+
for attempt in range(max_retries):
59+
try:
60+
return func(*args, **kwargs)
61+
except Exception as e:
62+
last_exception = e
63+
if attempt < max_retries - 1:
64+
logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying...")
65+
time.sleep(delay)
66+
raise last_exception
67+
return wrapper
68+
return decorator
69+
2970
def validate_github_url(url: str) -> bool:
3071
"""Validate GitHub repository URL"""
31-
return url.startswith(('https://github.com/', 'http://github.com/'))
72+
if not url:
73+
raise ValidationError("Repository URL cannot be empty")
74+
if not url.startswith(('https://github.com/', 'http://github.com/')):
75+
raise ValidationError("Invalid GitHub URL format. URL must start with 'https://github.com/' or 'http://github.com/'")
76+
return True
3277

3378
def get_repo_name(url: str) -> str:
3479
"""Extract repository name from URL"""
3580
try:
36-
return url.split('/')[-1].replace('.git', '')
81+
parts = url.split('/')
82+
if len(parts) < 5:
83+
raise ValidationError("Invalid repository URL format")
84+
repo_name = parts[-1].replace('.git', '')
85+
if not repo_name:
86+
raise ValidationError("Could not extract repository name from URL")
87+
return repo_name
3788
except Exception as e:
38-
raise GitHubRAGError(f"Invalid repository URL: {str(e)}")
89+
raise ValidationError(f"Failed to extract repository name: {str(e)}")
90+
91+
def cleanup_session():
92+
"""Clean up session resources"""
93+
try:
94+
if hasattr(st.session_state, 'file_cache'):
95+
for key, value in st.session_state.file_cache.items():
96+
try:
97+
del value
98+
except Exception as e:
99+
logger.warning(f"Failed to cleanup cache entry {key}: {str(e)}")
100+
st.session_state.file_cache.clear()
101+
gc.collect()
102+
logger.info("Session cleanup completed successfully")
103+
except Exception as e:
104+
logger.error(f"Error during session cleanup: {str(e)}")
105+
raise SessionError(f"Failed to cleanup session: {str(e)}")
39106

40107
def reset_chat():
41108
"""Reset chat session and clean up resources"""
42109
try:
43110
st.session_state.messages = []
44111
st.session_state.context = None
45-
gc.collect()
112+
cleanup_session()
46113
logger.info("Chat session reset successfully")
47114
except Exception as e:
48115
logger.error(f"Error resetting chat: {str(e)}")
49-
raise GitHubRAGError("Failed to reset chat session")
116+
raise SessionError("Failed to reset chat session")
50117

118+
@retry_on_error()
51119
def process_with_gitingets(github_url: str) -> tuple:
52120
"""Process GitHub repository using gitingest"""
53121
try:
54122
summary, tree, content = ingest(github_url)
55123
if not all([summary, tree, content]):
56-
raise GitHubRAGError("Failed to process repository: Missing data")
124+
raise ProcessingError("Failed to process repository: Missing data")
57125
return summary, tree, content
58126
except Exception as e:
59127
logger.error(f"Error processing repository: {str(e)}")
60-
raise GitHubRAGError(f"Failed to process repository: {str(e)}")
128+
raise ProcessingError(f"Failed to process repository: {str(e)}")
61129

62130
def create_query_engine(content_path: str, repo_name: str) -> Any:
63131
"""Create and configure query engine"""
@@ -97,7 +165,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
97165
return query_engine
98166
except Exception as e:
99167
logger.error(f"Error creating query engine: {str(e)}")
100-
raise GitHubRAGError(f"Failed to create query engine: {str(e)}")
168+
raise QueryEngineError(f"Failed to create query engine: {str(e)}")
101169

102170
# Initialize session state
103171
if "id" not in st.session_state:
@@ -147,7 +215,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
147215
st.success("Repository loaded successfully! Ready to chat.")
148216
logger.info(f"Successfully processed repository: {repo_name}")
149217

150-
except GitHubRAGError as e:
218+
except ProcessingError as e:
151219
st.error(str(e))
152220
logger.error(f"Error processing repository {repo_name}: {str(e)}")
153221
st.stop()
@@ -198,7 +266,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
198266
query_engine = st.session_state.file_cache.get(file_key)
199267

200268
if query_engine is None:
201-
raise GitHubRAGError("Please load a repository first!")
269+
raise QueryEngineError("Please load a repository first!")
202270

203271
response = query_engine.query(prompt)
204272

@@ -214,7 +282,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
214282
message_placeholder.markdown(full_response)
215283
st.session_state.messages.append({"role": "assistant", "content": full_response})
216284

217-
except GitHubRAGError as e:
285+
except QueryEngineError as e:
218286
st.error(str(e))
219287
logger.error(f"Error in chat processing: {str(e)}")
220288
except Exception as e:

0 commit comments

Comments
 (0)