55import pandas as pd
66from typing import Optional , Dict , Any
77import logging
8+ from functools import wraps
9+ import time
810
911from gitingest import ingest
1012from llama_index .core import Settings , PromptTemplate , VectorStoreIndex , SimpleDirectoryReader
1315from dotenv import load_dotenv
1416
1517# Configure logging
16- logging .basicConfig (level = logging .INFO )
18+ logging .basicConfig (
19+ level = logging .INFO ,
20+ format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
21+ )
1722logger = logging .getLogger (__name__ )
1823
1924load_dotenv ()
2025
2126# Constants
2227MAX_REPO_SIZE = 100 * 1024 * 1024 # 100MB
2328SUPPORTED_REPO_TYPES = ['.py' , '.md' , '.ipynb' , '.js' , '.ts' , '.json' ]
29+ MAX_RETRIES = 3
30+ RETRY_DELAY = 1 # seconds
2431
2532class GitHubRAGError (Exception ):
26- """Custom exception for GitHub RAG application errors"""
33+ """Base exception for GitHub RAG application errors"""
2734 pass
2835
36+ class ValidationError (GitHubRAGError ):
37+ """Raised when input validation fails"""
38+ pass
39+
40+ class ProcessingError (GitHubRAGError ):
41+ """Raised when repository processing fails"""
42+ pass
43+
44+ class QueryEngineError (GitHubRAGError ):
45+ """Raised when query engine creation or operation fails"""
46+ pass
47+
48+ class SessionError (GitHubRAGError ):
49+ """Raised when session management fails"""
50+ pass
51+
52+ def retry_on_error (max_retries = MAX_RETRIES , delay = RETRY_DELAY ):
53+ """Decorator for retrying operations on failure"""
54+ def decorator (func ):
55+ @wraps (func )
56+ def wrapper (* args , ** kwargs ):
57+ last_exception = None
58+ for attempt in range (max_retries ):
59+ try :
60+ return func (* args , ** kwargs )
61+ except Exception as e :
62+ last_exception = e
63+ if attempt < max_retries - 1 :
64+ logger .warning (f"Attempt { attempt + 1 } failed: { str (e )} . Retrying..." )
65+ time .sleep (delay )
66+ raise last_exception
67+ return wrapper
68+ return decorator
69+
2970def validate_github_url (url : str ) -> bool :
3071 """Validate GitHub repository URL"""
31- return url .startswith (('https://github.com/' , 'http://github.com/' ))
72+ if not url :
73+ raise ValidationError ("Repository URL cannot be empty" )
74+ if not url .startswith (('https://github.com/' , 'http://github.com/' )):
75+ raise ValidationError ("Invalid GitHub URL format. URL must start with 'https://github.com/' or 'http://github.com/'" )
76+ return True
3277
3378def get_repo_name (url : str ) -> str :
3479 """Extract repository name from URL"""
3580 try :
36- return url .split ('/' )[- 1 ].replace ('.git' , '' )
81+ parts = url .split ('/' )
82+ if len (parts ) < 5 :
83+ raise ValidationError ("Invalid repository URL format" )
84+ repo_name = parts [- 1 ].replace ('.git' , '' )
85+ if not repo_name :
86+ raise ValidationError ("Could not extract repository name from URL" )
87+ return repo_name
3788 except Exception as e :
38- raise GitHubRAGError (f"Invalid repository URL: { str (e )} " )
89+ raise ValidationError (f"Failed to extract repository name: { str (e )} " )
90+
91+ def cleanup_session ():
92+ """Clean up session resources"""
93+ try :
94+ if hasattr (st .session_state , 'file_cache' ):
95+ for key , value in st .session_state .file_cache .items ():
96+ try :
97+ del value
98+ except Exception as e :
99+ logger .warning (f"Failed to cleanup cache entry { key } : { str (e )} " )
100+ st .session_state .file_cache .clear ()
101+ gc .collect ()
102+ logger .info ("Session cleanup completed successfully" )
103+ except Exception as e :
104+ logger .error (f"Error during session cleanup: { str (e )} " )
105+ raise SessionError (f"Failed to cleanup session: { str (e )} " )
39106
40107def reset_chat ():
41108 """Reset chat session and clean up resources"""
42109 try :
43110 st .session_state .messages = []
44111 st .session_state .context = None
45- gc . collect ()
112+ cleanup_session ()
46113 logger .info ("Chat session reset successfully" )
47114 except Exception as e :
48115 logger .error (f"Error resetting chat: { str (e )} " )
49- raise GitHubRAGError ("Failed to reset chat session" )
116+ raise SessionError ("Failed to reset chat session" )
50117
118+ @retry_on_error ()
51119def process_with_gitingets (github_url : str ) -> tuple :
52120 """Process GitHub repository using gitingest"""
53121 try :
54122 summary , tree , content = ingest (github_url )
55123 if not all ([summary , tree , content ]):
56- raise GitHubRAGError ("Failed to process repository: Missing data" )
124+ raise ProcessingError ("Failed to process repository: Missing data" )
57125 return summary , tree , content
58126 except Exception as e :
59127 logger .error (f"Error processing repository: { str (e )} " )
60- raise GitHubRAGError (f"Failed to process repository: { str (e )} " )
128+ raise ProcessingError (f"Failed to process repository: { str (e )} " )
61129
62130def create_query_engine (content_path : str , repo_name : str ) -> Any :
63131 """Create and configure query engine"""
@@ -97,7 +165,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
97165 return query_engine
98166 except Exception as e :
99167 logger .error (f"Error creating query engine: { str (e )} " )
100- raise GitHubRAGError (f"Failed to create query engine: { str (e )} " )
168+ raise QueryEngineError (f"Failed to create query engine: { str (e )} " )
101169
102170# Initialize session state
103171if "id" not in st .session_state :
@@ -147,7 +215,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
147215 st .success ("Repository loaded successfully! Ready to chat." )
148216 logger .info (f"Successfully processed repository: { repo_name } " )
149217
150- except GitHubRAGError as e :
218+ except ProcessingError as e :
151219 st .error (str (e ))
152220 logger .error (f"Error processing repository { repo_name } : { str (e )} " )
153221 st .stop ()
@@ -198,7 +266,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
198266 query_engine = st .session_state .file_cache .get (file_key )
199267
200268 if query_engine is None :
201- raise GitHubRAGError ("Please load a repository first!" )
269+ raise QueryEngineError ("Please load a repository first!" )
202270
203271 response = query_engine .query (prompt )
204272
@@ -214,7 +282,7 @@ def create_query_engine(content_path: str, repo_name: str) -> Any:
214282 message_placeholder .markdown (full_response )
215283 st .session_state .messages .append ({"role" : "assistant" , "content" : full_response })
216284
217- except GitHubRAGError as e :
285+ except QueryEngineError as e :
218286 st .error (str (e ))
219287 logger .error (f"Error in chat processing: { str (e )} " )
220288 except Exception as e :
0 commit comments