Create app.py

aimaster-dev · aimaster-dev · commit 87d92b5dbe9b · 2023-10-26T14:23:41.000+02:00
diff --git a/app.py b/app.py
@@ -0,0 +1,115 @@
+# Import necessary libraries
+import databutton as db
+import streamlit as st
+import openai
+from my_pdf_lib import get_index_for_pdf
+from langchain.chains import RetrievalQA
+from langchain.chat_models import ChatOpenAI
+import os
+
+# Set the title for the Streamlit app
+st.title("RAG enhanced Chatbot")
+
+# Set up the OpenAI API key from databutton secrets
+os.environ["OPENAI_API_KEY"] = db.secrets.get("OPENAI_API_KEY")
+openai.api_key = db.secrets.get("OPENAI_API_KEY")
+
+
+# Cached function to create a vectordb for the provided PDF files
+@st.cache_data
+def create_vectordb(files, filenames):
+    # Show a spinner while creating the vectordb
+    with st.spinner("Vector database"):
+        vectordb = get_index_for_pdf(
+            [file.getvalue() for file in files], filenames, openai.api_key
+        )
+    return vectordb
+
+
+# Upload PDF files using Streamlit's file uploader
+pdf_files = st.file_uploader("", type="pdf", accept_multiple_files=True)
+
+# If PDF files are uploaded, create the vectordb and store it in the session state
+if pdf_files:
+    pdf_file_names = [file.name for file in pdf_files]
+    st.session_state["vectordb"] = create_vectordb(pdf_files, pdf_file_names)
+
+# Define the template for the chatbot prompt
+prompt_template = """
+    You are a helpful Assistant who answers to users questions based on multiple contexts given to you.
+
+    Keep your answer short and to the point.
+    
+    The evidence are the context of the pdf extract with metadata. 
+    
+    Carefully focus on the metadata specially 'filename' and 'page' whenever answering.
+    
+    Make sure to add filename and page number at the end of sentence you are citing to.
+        
+    Reply "Not applicable" if text is irrelevant.
+     
+    The PDF content is:
+    {pdf_extract}
+"""
+
+# Get the current prompt from the session state or set a default value
+prompt = st.session_state.get("prompt", [{"role": "system", "content": "none"}])
+
+# Display previous chat messages
+for message in prompt:
+    if message["role"] != "system":
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+
+# Get the user's question using Streamlit's chat input
+question = st.chat_input("Ask anything")
+
+# Handle the user's question
+if question:
+    vectordb = st.session_state.get("vectordb", None)
+    if not vectordb:
+        with st.message("assistant"):
+            st.write("You need to provide a PDF")
+            st.stop()
+
+    # Search the vectordb for similar content to the user's question
+    search_results = vectordb.similarity_search(question, k=3)
+    # search_results
+    pdf_extract = "/n ".join([result.page_content for result in search_results])
+
+    # Update the prompt with the pdf extract
+    prompt[0] = {
+        "role": "system",
+        "content": prompt_template.format(pdf_extract=pdf_extract),
+    }
+
+    # Add the user's question to the prompt and display it
+    prompt.append({"role": "user", "content": question})
+    with st.chat_message("user"):
+        st.write(question)
+
+    # Display an empty assistant message while waiting for the response
+    with st.chat_message("assistant"):
+        botmsg = st.empty()
+
+    # Call ChatGPT with streaming and display the response as it comes
+    response = []
+    result = ""
+    for chunk in openai.ChatCompletion.create(
+        model="gpt-3.5-turbo", messages=prompt, stream=True
+    ):
+        text = chunk.choices[0].get("delta", {}).get("content")
+        if text is not None:
+            response.append(text)
+            result = "".join(response).strip()
+            botmsg.write(result)
+
+    # Add the assistant's response to the prompt
+    prompt.append({"role": "assistant", "content": result})
+
+    # Store the updated prompt in the session state
+    st.session_state["prompt"] = prompt
+    prompt.append({"role": "assistant", "content": result})
+
+    # Store the updated prompt in the session state
+    st.session_state["prompt"] = prompt