diff --git a/Chatbot.py b/Chatbot.py new file mode 100644 index 0000000..b6fe7ee --- /dev/null +++ b/Chatbot.py @@ -0,0 +1,29 @@ +from openai import OpenAI +import streamlit as st + +with st.sidebar: + openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password") + "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" + "[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)" + "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)" + +st.title("💬 Chatbot") +st.caption("🚀 A streamlit chatbot powered by OpenAI LLM") +if "messages" not in st.session_state: + st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}] + +for msg in st.session_state.messages: + st.chat_message(msg["role"]).write(msg["content"]) + +if prompt := st.chat_input(): + if not openai_api_key: + st.info("Please add your OpenAI API key to continue.") + st.stop() + + client = OpenAI(api_key=openai_api_key) + st.session_state.messages.append({"role": "user", "content": prompt}) + st.chat_message("user").write(prompt) + response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages) + msg = response.choices[0].message.content + st.session_state.messages.append({"role": "assistant", "content": msg}) + st.chat_message("assistant").write(msg) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a7eb69 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# ⭐ Elmento AI + +Talk to your documents using AI. + +image + + +## Overview of the App + +This app is constantly developing to cover more models and file types. + +At the moment, you can: + +- Upload PDF, JPG, JPEG and PNG files +- PDF File - get a summary or ask AI any questions about your PDF +- Image - get a description of what is on the image +- We keep a history of your files and chats for you to return to + +_Watch the demo on YouTube:_ + +[![Video Title](http://img.youtube.com/vi/p2US26T149Y/0.jpg)](http://www.youtube.com/watch?v=p2US26T149Y) + +## Demo App + +[![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://elmento-dev.streamlit.app/Profile) + +### Get an OpenAI API key + +You can get your own OpenAI API key by following the following instructions: + +1. Go to https://platform.openai.com/account/api-keys. +2. Click on the `+ Create new secret key` button. +3. Next, enter an identifier name (optional) and click on the `Create secret key` button. + +### Contributing: + +I would love to get more people on board with this project! There are just a few small things you need to do :) +1. Fork the repository and clone it to your local machine. +2. Make your desired changes or additions. +3. Test your changes thoroughly. +4. Submit a pull request with a clear description of your changes and their purpose. + + diff --git a/app_test.py b/app_test.py new file mode 100644 index 0000000..829f048 --- /dev/null +++ b/app_test.py @@ -0,0 +1,56 @@ +import datetime +from unittest.mock import patch +from streamlit.testing.v1 import AppTest +from openai.types.chat import ChatCompletionMessage +from openai.types.chat.chat_completion import ChatCompletion, Choice + + +# See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346 +def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion: + return ChatCompletion( + id="foo", + model="gpt-3.5-turbo", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + index=0, + message=ChatCompletionMessage( + content=response, + role=role, + ), + ) + ], + created=int(datetime.datetime.now().timestamp()), + ) + + +@patch("openai.resources.chat.Completions.create") +def test_Chatbot(openai_create): + at = AppTest.from_file("Chatbot.py").run() + assert not at.exception + at.chat_input[0].set_value("Do you know any jokes?").run() + assert at.info[0].value == "Please add your OpenAI API key to continue." + + JOKE = "Why did the chicken cross the road? To get to the other side." + openai_create.return_value = create_chat_completion(JOKE) + at.text_input(key="chatbot_api_key").set_value("sk-...") + at.chat_input[0].set_value("Do you know any jokes?").run() + print(at) + assert at.chat_message[1].markdown[0].value == "Do you know any jokes?" + assert at.chat_message[2].markdown[0].value == JOKE + assert at.chat_message[2].avatar == "assistant" + assert not at.exception + + +@patch("langchain.llms.OpenAI.__call__") +def test_Langchain_Quickstart(langchain_llm): + at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run() + assert at.info[0].value == "Please add your OpenAI API key to continue." + + RESPONSE = "1. The best way to learn how to code is by practicing..." + langchain_llm.return_value = RESPONSE + at.sidebar.text_input[0].set_value("sk-...") + at.button[0].set_value(True).run() + print(at) + assert at.info[0].value == RESPONSE diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000..c6e56a6 --- /dev/null +++ b/packages.txt @@ -0,0 +1,2 @@ +tesseract-ocr +tesseract-ocr-eng \ No newline at end of file diff --git a/pages/1_Profile.py b/pages/1_Profile.py new file mode 100644 index 0000000..4a22141 --- /dev/null +++ b/pages/1_Profile.py @@ -0,0 +1,103 @@ +import streamlit as st +import firebase_admin + +from firebase_admin import credentials +from firebase_admin import auth +from firebase_admin import firestore + +def initialize_firebase_app(): + try: + firebase_admin.get_app() + except ValueError: + secrets = st.secrets["firebase-auth"] + + cred = credentials.Certificate({ + "type": secrets["type"], + "project_id": secrets["project_id"], + "private_key_id": secrets["private_key_id"], + "private_key": secrets["private_key"], + "client_email": secrets["client_email"], + "client_id": secrets["client_id"], + "auth_uri": secrets["auth_uri"], + "token_uri": secrets["token_uri"], + "auth_provider_x509_cert_url": secrets["auth_provider_x509_cert_url"], + "client_x509_cert_url": secrets["client_x509_cert_url"] + }) + + # Initialize the Firebase app with the created credential + firebase_admin.initialize_app(cred, + { + 'storageBucket': 'gs://elmeto-12de0.appspot.com' + } + ) + +# Call the function to initialize the app +initialize_firebase_app() + +st.title('Welcome to :blue[Elmento]') + +if 'username' not in st.session_state: + st.session_state.username = '' + +if 'useremail' not in st.session_state: + st.session_state.useremail = '' + +def f(): + try: + user = auth.get_user_by_email(email) + print(user.uid) + + st.success('Login Successfully!') + + st.session_state.username = user.uid + st.session_state.useremail = user.email + + st.session_state.signout = True + st.session_state.signedout = True + st.session_state['logged_in'] = True + except: + st.warning('Login Failed') + +# sign out function +def t(): + st.session_state.signout = False + st.session_state.signedout = False + st.session_state.username = '' + +if 'signedout' not in st.session_state: + st.session_state.signedout = False +if 'signout' not in st.session_state: + st.session_state.signout = False + +if not st.session_state['signedout']: + db = firestore.client() + st.session_state.db = db + docs = db.collection('users').get() + + choice = st.selectbox('Login/Signup', ['Login', 'Sign Up']) + + if choice == 'Login': + email = st.text_input('Email Address') + password = st.text_input('Password', type='password') + st.button('Login', on_click=f) + + else: + email = st.text_input('Email Address') + password = st.text_input('Password', type='password') + + username = st.text_input('Enter your unique username') + + if st.button('Create my account'): + user = auth.create_user(email=email, password=password) + + doc_ref = db.collection('users').document(user.uid) + doc_ref.set({ + 'uid': user.uid, + 'email': email, + }) + + st.success('Account created successfully!') + st.markdown('Please login using your email and password') + st.balloons() + + diff --git a/pages/2_Documents.py b/pages/2_Documents.py new file mode 100644 index 0000000..569e66d --- /dev/null +++ b/pages/2_Documents.py @@ -0,0 +1,378 @@ +import base64 +import streamlit as st +import requests +from PIL import Image +import io +import pytesseract +import shutil +from streamlit.components.v1 import html +from firebase_admin import firestore, storage +import uuid +import datetime +import fitz +import contextlib +import base64 + +# CHANGE FOR CLOUD DEPLOY!!!! +pytesseract.pytesseract.tesseract_cmd = None +# pytesseract.pytesseract.tesseract_cmd = r'C:\Users\sasha\AppData\Local\Programs\Tesseract-OCR\tesseract.exe' + +# search for tesseract binary in path +@st.cache_resource +def find_tesseract_binary() -> str: + return shutil.which("tesseract") + +# INITIALISE VARIABLES ################################################################################################# + +# pytesseract +pytesseract.pytesseract.tesseract_cmd = find_tesseract_binary() +if not pytesseract.pytesseract.tesseract_cmd: + st.error("Tesseract binary not found in PATH. Please install Tesseract.") + +# firestore database +db = firestore.client() +bucket = storage.bucket('elmeto-12de0.appspot.com') + +# logged in parameter +if 'logged_in' not in st.session_state: + st.session_state.logged_in = False + + +# INITIALISE FUNCTIONS ################################################################################################# + +def encode_image(image_path): + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + +def save_uploaded_file(uploaded_file, target_path): + with open(target_path, "wb") as f: + f.write(uploaded_file.getbuffer()) + +def send_image_to_openai(image_bytes, api_key, key): + base64_image = base64.b64encode(image_bytes).decode('utf-8') + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + payload = { + "model": "gpt-4-vision-preview", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image? Explain the image content" + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + } + } + ] + } + ], + "max_tokens": 100 + } + if st.button("Get Explanation", key = key): + try: + response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) + print(response.json()) + st.success("Explanation: {}".format(response.json()['choices'][0]['message']['content'])) + except Exception as e: + st.error(f"Error: {e}") + +def send_text_to_openai(text_content): + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" + } + + payload = { + "model": f"gpt-3.5-turbo-0125", + "messages": [ + { + "role": "user", + "content": f"Summarise this text for me: ... {text_content}" + } + ], + "max_tokens": 100 + } + + try: + response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) + explanation = response.json()['choices'][0]['message']['content'] + st.success(f"Explanation: {explanation}") + except Exception as e: + st.error(f"Error: {e}") + +def chat_to_ai(file_name): + # Functionality to chat about the specific PDF + st.write(f"Chatting about {file_name}...") + +def get_summary(pdf_bytes, file_name): + st.write(f"Getting summary for {file_name}...") + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + pdf_images = [] + pdf_texts = [] # List to store text from all pages + + for page_index in range(len(doc)): + page = doc[page_index] + pix = page.get_pixmap() + image_data = pix.tobytes() + pdf_image = Image.open(io.BytesIO(image_data)) + pdf_images.append(pdf_image) + + text = pytesseract.image_to_string(pdf_image) + pdf_texts.append(text) + + st.write(pdf_texts) + send_text_to_openai(pdf_texts) + + +def nav_page(page_name, timeout_secs=3): + nav_script = """ + + """ % (page_name, timeout_secs) + html(nav_script) + + +def get_existing_files(): + docs_ref = db.collection('users').document(username).collection('documents') + docs = docs_ref.get() + files = [doc.to_dict() for doc in docs] + return files + + +def get_existing_file_names(): + names = [] + docs_ref = db.collection('users').document(username).collection('documents') + docs = docs_ref.get() + files = [doc.to_dict() for doc in docs] + for file in files: + filename = file['filename'] + names.append(filename) + return names + + +def get_last_file(): + docs_ref = db.collection('users').document(username).collection('documents') + query = docs_ref.order_by('uploaded_at', direction=firestore.Query.DESCENDING).limit(1) + docs = query.stream() + # docs = docs_ref.get() + files = [doc.to_dict() for doc in docs] + file = files[len(files)-1] + return file + + +def check_file(file): + response = requests.get(file['url']) + response_url = file['url'] + response_filename = file['filename'] + if response.status_code == 200: + st.markdown(f"[{response_filename}]({response_url})") + else: + st.write(f"Failed: {response.status_code} file name: {file['filename']}") + + +def create_thumbnail(image_stream, format): + size = (128, 128) + image = Image.open(image_stream) + image.thumbnail(size) + + thumb_io = io.BytesIO() + image.save(thumb_io, format, quality=95) + thumb_io.seek(0) + return thumb_io + + +def display_file_with_thumbnail(file): + if file.get('thumbnail_url'): + link = f"[![Thumbnail]({file['thumbnail_url']})]({file['url']})" + st.markdown(link, unsafe_allow_html=True) + else: + st.markdown(f"[{file['filename']}]({file['url']})") + + +def parse_text(): + st.write('parsing...') + + +def pdf_page_to_image(pdf_stream): + doc = fitz.open("pdf", pdf_stream) + page = doc.load_page(0) + + pix = page.get_pixmap(matrix=fitz.Matrix(72 / 72, 72 / 72)) + + img_bytes = io.BytesIO() + img_bytes.write(pix.tobytes("png")) + img_bytes.seek(0) + + doc.close() + return img_bytes + +def pdf_parse_content(pdf_bytes): + doc = fitz.open(stream=pdf_bytes, filetype="pdf") + pdf_images = [] + pdf_texts = [] # List to store text from all pages + + for page_index in range(len(doc)): + page = doc[page_index] + pix = page.get_pixmap() + image_data = pix.tobytes() + pdf_image = Image.open(io.BytesIO(image_data)) + pdf_images.append(pdf_image) + + text = pytesseract.image_to_string(pdf_image) + pdf_texts.append(text) + + st.session_state['username'] = username + st.session_state['pdf_images'] = pdf_images + st.session_state['pdf_texts'] = pdf_texts + st.session_state['file_name'] = file['filename'] + st.session_state['chat_file_name'] = file['filename'] + st.session_state['doc_id'] = file['doc_id'] + + chat_id = file['doc_id'] + + #adding chat to db + doc_ref = db.collection('users').document(username).collection('chats').document(chat_id) + doc_ref.set({ + 'filename': file['filename'], + 'pdf_text': pdf_texts, + 'chat_id' : chat_id, + 'file_id' : file['doc_id'] + }) + + nav_page("chat_to_ai") + +def upload_file(uploaded_file, thumbnail_stream): + blob = bucket.blob(f"{username}/{uuid.uuid4()}_{uploaded_file.name}") + blob.upload_from_string(uploaded_file.getvalue(), content_type=uploaded_file.type) + + # Prepare the thumbnail + if thumbnail_stream: + thumb_blob = bucket.blob(f"{username}/{uuid.uuid4()}_thumb_{uploaded_file.name}") + thumb_blob.upload_from_string(thumbnail_stream.getvalue(), content_type='image/png') + + thumb_url = thumb_blob.generate_signed_url(version="v4", expiration=datetime.timedelta(minutes=10000), + method='GET') + else: + thumb_url = None + + url = blob.generate_signed_url(version="v4", expiration=datetime.timedelta(minutes=10000), method='GET') + + doc_ref = db.collection('users').document(username).collection('documents').document() + doc_ref.set({ + 'filename': uploaded_file.name, + 'content_type': uploaded_file.type, + 'url': url, + 'blob': str(blob), + 'thumbnail_url': thumb_url, + 'uploaded_at': firestore.SERVER_TIMESTAMP, + 'doc_id': doc_ref.id + }) + + return doc_ref.get().to_dict() + +def delete_file(username, file_id): + st.write(f"Trying to delete...") + try: + # Document reference + doc_ref = db.collection('users').document(username).collection('documents').document(file_id) + # the file id here needs to be replaced by the chat_id + chats_ref = db.collection('users').document(username).collection('chats').document(file_id) + chats_ref.delete() + doc_ref.delete() + st.write('Deleted successfully') + except Exception as e: + st.write(f"An error occurred while trying to delete the file: {e}") + +def display_file_with_thumbnail(file): + if file.get('thumbnail_url'): + st.image(file['thumbnail_url'], caption=file['filename'], width=300) + else: + st.markdown(f"[{file['filename']}]({file['url']})") + +def upload_single_file(uploaded_file): + print('Uploading new file...') + thumbnail_stream = None + if uploaded_file.type.startswith('image/'): + thumbnail_stream = create_thumbnail(uploaded_file, uploaded_file.type.split('/')[-1]) + elif uploaded_file.type.startswith('application/pdf'): + thumbnail_stream = pdf_page_to_image(uploaded_file.getvalue()) + + upload_file(uploaded_file, thumbnail_stream) + if thumbnail_stream is not None: + with contextlib.closing(thumbnail_stream): + pass + + # st.write(f'Current document is:') + file = get_last_file() + return file + +def get_img_blob(file): + blob_path = file['blob'] + parts = blob_path.split(',') + blob_path = parts[1].strip() + blob = bucket.blob(blob_path) + image_bytes = blob.download_as_bytes() + return image_bytes + +st.title("Documents") + +if st.session_state.logged_in: + api_key = st.text_input("OpenAI API Key", key="file_qa_api_key", type="password") + username = st.session_state.username + + files = get_existing_files() + existing_file_names = [file['filename'] for file in files] # List of existing file names + + with st.form("my-form", clear_on_submit=True): + uploaded_file = st.file_uploader("FILE UPLOADER") + submitted = st.form_submit_button("UPLOAD!") + + if uploaded_file and uploaded_file.name not in existing_file_names: + file = upload_single_file(uploaded_file) + uploaded_file = None # Clear the uploaded file after handling + st.experimental_rerun() + + if files: + st.write(f'All files are:') + for file in files: + display_file_with_thumbnail(file) + if st.button("Delete", key=f"delete_{file['url']}"): + delete_file(username, file['doc_id']) # Function to delete the file + file_extension = file['filename'].split(".")[-1].lower() + if file_extension in ["jpg", "jpeg", "png"]: + image_bytes = get_img_blob(file) + send_image_to_openai(image_bytes, api_key, key=f"chat_{file['url']}") + elif file_extension == "pdf": + pdf_bytes = get_img_blob(file) + if st.button("Chat to AI", key=f"chat_{file['url']}"): + pdf_parse_content(pdf_bytes) + if st.button("Get Summary", key=f"chat_summary_{file['url']}"): + get_summary(pdf_bytes, file['filename']) +else: + st.write('Register please.') diff --git a/pages/3_Chat_To_AI.py b/pages/3_Chat_To_AI.py new file mode 100644 index 0000000..eeb3ce1 --- /dev/null +++ b/pages/3_Chat_To_AI.py @@ -0,0 +1,90 @@ +import streamlit as st +from firebase_admin import firestore, storage +import streamlit as st +from langchain.text_splitter import CharacterTextSplitter +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores import FAISS +from langchain.chains.question_answering import load_qa_chain +from langchain.llms import OpenAI +from langchain.callbacks import get_openai_callback +import datetime + +db = firestore.client() + +# FUNCTIONS +def response_func(prompt, text): + text = str(text) + text_splitter = CharacterTextSplitter( + separator="\n", + chunk_size=1000, + chunk_overlap=200, + length_function=len + ) + chunks = text_splitter.split_text(text) + embeddings = OpenAIEmbeddings(openai_api_key = api_key) + knowledge_base = FAISS.from_texts(chunks, embeddings) + docs = knowledge_base.similarity_search(prompt) + llm = OpenAI(openai_api_key = api_key) + chain = load_qa_chain(llm, chain_type="stuff") + with get_openai_callback() as cb: + result = chain.run(input_documents=docs, question=prompt) + return result + + +def display_messages(chat_id, username): + # Fetch messages from Firestore + messages = db.collection('users').document(username).collection('chats').document(chat_id).collection( + 'messages').order_by("timestamp").stream() + + # Display messages using Streamlit's chat message format + for message in messages: + if 'message_user' in message.to_dict() and message.get('message_user'): + with st.chat_message("user"): + st.markdown(message.get('message_user')) + + if 'message_ai' in message.to_dict() and message.get('message_ai'): + with st.chat_message("assistant"): + st.markdown(message.get('message_ai')) + +if 'logged_in' not in st.session_state: + st.session_state.logged_in = False + +st.title("Chat To AI") + +# MAIN SCRIPT +if 'logged_in' in st.session_state and st.session_state.logged_in: + api_key = st.text_input("OpenAI API Key", key="file_docs_api_key", type="password") + if 'username' in st.session_state: + username = st.session_state['username'] + # st.write(f"Logged in as: {username}") + + chats_ref = db.collection('users').document(username).collection('chats') + chats = chats_ref.get() + chats_all = [chat.to_dict() for chat in chats] + chat_names = [chat['filename'] for chat in chats_all if 'filename' in chat] + selected_chat_name = st.sidebar.radio("Select a Chat:", chat_names) + selected_chat_data = next((chat for chat in chats_all if chat['filename'] == selected_chat_name), None) + + if selected_chat_data: + st.write(f"Starting chat session FOR: {selected_chat_data['filename']}") + st.write(f"The id in the selected file is: {selected_chat_data['chat_id']}") + display_messages(selected_chat_data['chat_id'], username) + if prompt := st.chat_input("What is up?"): + chat_id = selected_chat_data['chat_id'] + with st.chat_message("user"): + st.markdown(prompt) + #st.session_state.messages.append({"role": "user", "content": prompt}) + response = response_func(prompt, selected_chat_data['pdf_text']) + with st.chat_message("assistant"): + st.markdown(response) + doc_ref = db.collection('users').document(username).collection('chats').document(chat_id).collection( + 'messages').document() + doc_ref.set({ + 'message_user': prompt, + 'message_ai' : response, + 'timestamp': datetime.datetime.now(datetime.timezone.utc).isoformat() + }) + +else: + st.write('Please register or login to continue.') + diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..8635be6 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,5 @@ +black==23.3.0 +mypy==1.4.1 +pre-commit==3.3.3 +watchdog +pytest diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d55880c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +streamlit>=1.28 +langchain>=0.0.217 +openai>=1.2 +duckduckgo-search +anthropic>=0.3.0 +trubrics>=1.4.3 +pytesseract==0.3.8 +pyMuPDF==1.24.2 +pytest-shutil +tiktoken +faiss-cpu +google-cloud-firestore +firebase-admin +pdf2image==1.17.0 \ No newline at end of file diff --git a/test/test_firebaseStorage.py b/test/test_firebaseStorage.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_firebasedata.py b/test/test_firebasedata.py new file mode 100644 index 0000000..ea3f7c4 --- /dev/null +++ b/test/test_firebasedata.py @@ -0,0 +1,15 @@ +import streamlit as st +from google.cloud import firestore + +# Authenticate to Firestore with the JSON account key. +db = firestore.Client.from_service_account_json(r"C:\Users\sasha\PycharmProjects\elmento\elmento-secret.json") + +# Create a reference to the Google post. +doc_ref = db.collection("posts").document("Google") + +# Then get the data at that reference. +doc = doc_ref.get() + +# Let's see what we got! +st.write("The id is: ", doc.id) +st.write("The contents are: ", doc.to_dict()) \ No newline at end of file