Skip to content

Commit

Permalink
files are updated
Browse files Browse the repository at this point in the history
  • Loading branch information
varunsalunkhe committed Jan 6, 2025
1 parent 5ee646d commit bd5a972
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 43 deletions.
59 changes: 19 additions & 40 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import streamlit as st
from langchain.vectorstores import Chroma
import chromadb
from langchain.vectorstores import FAISS, Chroma
from langchain.prompts import ChatPromptTemplate
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain, ConversationalRetrievalChain
Expand All @@ -10,28 +11,30 @@
# Streamlit interface for uploading PDF
st.title("RAG-Based Chat Bot 🤖")
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
main_placeholder = st.empty()

if uploaded_file is not None:
# Step 1: Load PDF content
pdf_content = load_pdf(uploaded_file)
st.write("PDF loaded successfully!")
main_placeholder.write("PDF loaded successfully!")

# Step 2: Split the text into chunks
documents = split_text(pdf_content)
st.write(f"Text split into {len(documents)} chunks.")
# st.write(f"Text split into {len(documents)} chunks.")

# Step 3: Generate embeddings for the documents
embeddings = generate_embeddings() # Pass documents to the function
st.write("Embeddings generated for the documents.")
# st.write("Embeddings generated for the documents.")

# Step 4: Save the vectorized documents to Weaviate
vector_db = save_to_vectordb(documents, embeddings)
st.success("Documents are vectorized and saved to CromaDB.")
# st.success("Documents are vectorized and saved to CromaDB.")

# Step 5: Create the HuggingFace model
st.write("Attempting to create the HuggingFace model...")
# st.write("Attempting to create the HuggingFace model...")
model = create_huggingface_model()
st.success("Model created successfully!")
# st.success("Model created successfully!")



retriever = vector_db.as_retriever()
Expand All @@ -47,52 +50,28 @@
prompt = ChatPromptTemplate.from_template(template)

# Defining a QnA chain
QnA = ConversationalRetrievalChain.from_llm(llm = model,
QnA = RetrievalQA.from_chain_type(llm = model,
chain_type = 'stuff',
retriever = retriever,
verbose = False)
st.success("RAG pipeline created successfully!")

# st.success("RAG pipeline created successfully!")

# User Input for Questions
question = st.text_input("Ask a question")

def get_answers(QnA, query):
answer = QnA.run(query)
# print(f"\033[1mQuery:\033[0m {query}\n")
return (f"\033[1mAnswer:\033[0m ", answer)
response = QnA.run(query)
return response
# if "Answer:" in response:
# answer = response.split("Answer:", 1)[1].strip()
# return answer

if question:
with st.spinner("Processing your question..."):
answer = get_answers(QnA, question)
st.write("### Answer:")
# st.write("### Answer:")
st.write(answer)

else:
st.warning("Please upload a PDF file to proceed.")


# def process_rag_response(question):

# try:
# # Invoke the RAG pipeline directly with the question
# response = rag_chain.invoke(question)

# # Ensure response is a string
# if not isinstance(response, str):
# response = str(response)

# # Check if the response contains "Answer:" and extract it
# if "Answer:" in response:
# answer = response.split("Answer:", 1)[1].strip() # Split and extract the answer
# return answer
# else:
# return "Sorry, I didn’t understand your question. Do you want to connect with a live agent?"

# except Exception as e:
# # Handle any exceptions that occur
# import traceback
# st.error(f"An error occurred: {e}")
# print(traceback.format_exc()) # Log the error for debugging
# return "Sorry, there was an error processing your request."

# Handle user input and display the answer
7 changes: 4 additions & 3 deletions modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
import faiss
from langchain.vectorstores import FAISS
import chromadb
from langchain.vectorstores import FAISS, Chroma
import tempfile

def load_pdf(uploaded_file):
Expand Down Expand Up @@ -37,14 +38,14 @@ def save_to_vectordb(docs, embeddings):
"""
Saves the vectorized documents to Weaviate vector database.
"""
vector_db = FAISS.from_documents(
vector_db = Chroma.from_documents(
docs,
embeddings
)
return vector_db

# Function to initialize the HuggingFace model
def create_huggingface_model(temperature: float = 1.0, context_length: int = 1000, max_new_tokens= 600):
def create_huggingface_model(temperature: float = 0, context_length: int = 4096, max_new_tokens= 2048):

try:
llm = CTransformers(model='TheBloke/Llama-2-7B-Chat-GGML',
Expand Down
12 changes: 12 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
chromadb==0.6.1
ctransformers==0.2.27
faiss-cpu==1.9.0.post1
huggingface-hub==0.27.0
langchain==0.3.13
langchain-community==0.3.13
langchain-core==0.3.28
langchain-text-splitters==0.3.4
pypdf==5.1.0
sentence-transformers==3.3.1
streamlit==1.41.1
transformers==4.47.1

0 comments on commit bd5a972

Please sign in to comment.