diff --git a/Chatbot.py b/Chatbot.py
new file mode 100644
index 0000000..b6fe7ee
--- /dev/null
+++ b/Chatbot.py
@@ -0,0 +1,29 @@
+from openai import OpenAI
+import streamlit as st
+
+with st.sidebar:
+ openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
+ "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
+ "[View the source code](https://github.com/streamlit/llm-examples/blob/main/Chatbot.py)"
+ "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"
+
+st.title("💬 Chatbot")
+st.caption("🚀 A streamlit chatbot powered by OpenAI LLM")
+if "messages" not in st.session_state:
+ st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
+
+for msg in st.session_state.messages:
+ st.chat_message(msg["role"]).write(msg["content"])
+
+if prompt := st.chat_input():
+ if not openai_api_key:
+ st.info("Please add your OpenAI API key to continue.")
+ st.stop()
+
+ client = OpenAI(api_key=openai_api_key)
+ st.session_state.messages.append({"role": "user", "content": prompt})
+ st.chat_message("user").write(prompt)
+ response = client.chat.completions.create(model="gpt-3.5-turbo", messages=st.session_state.messages)
+ msg = response.choices[0].message.content
+ st.session_state.messages.append({"role": "assistant", "content": msg})
+ st.chat_message("assistant").write(msg)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0a7eb69
--- /dev/null
+++ b/README.md
@@ -0,0 +1,43 @@
+# ⭐ Elmento AI
+
+Talk to your documents using AI.
+
+
+
+
+## Overview of the App
+
+This app is constantly developing to cover more models and file types.
+
+At the moment, you can:
+
+- Upload PDF, JPG, JPEG and PNG files
+- PDF File - get a summary or ask AI any questions about your PDF
+- Image - get a description of what is on the image
+- We keep a history of your files and chats for you to return to
+
+_Watch the demo on YouTube:_
+
+[![Video Title](http://img.youtube.com/vi/p2US26T149Y/0.jpg)](http://www.youtube.com/watch?v=p2US26T149Y)
+
+## Demo App
+
+[![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://elmento-dev.streamlit.app/Profile)
+
+### Get an OpenAI API key
+
+You can get your own OpenAI API key by following the following instructions:
+
+1. Go to https://platform.openai.com/account/api-keys.
+2. Click on the `+ Create new secret key` button.
+3. Next, enter an identifier name (optional) and click on the `Create secret key` button.
+
+### Contributing:
+
+I would love to get more people on board with this project! There are just a few small things you need to do :)
+1. Fork the repository and clone it to your local machine.
+2. Make your desired changes or additions.
+3. Test your changes thoroughly.
+4. Submit a pull request with a clear description of your changes and their purpose.
+
+
diff --git a/app_test.py b/app_test.py
new file mode 100644
index 0000000..829f048
--- /dev/null
+++ b/app_test.py
@@ -0,0 +1,56 @@
+import datetime
+from unittest.mock import patch
+from streamlit.testing.v1 import AppTest
+from openai.types.chat import ChatCompletionMessage
+from openai.types.chat.chat_completion import ChatCompletion, Choice
+
+
+# See https://github.com/openai/openai-python/issues/715#issuecomment-1809203346
+def create_chat_completion(response: str, role: str = "assistant") -> ChatCompletion:
+ return ChatCompletion(
+ id="foo",
+ model="gpt-3.5-turbo",
+ object="chat.completion",
+ choices=[
+ Choice(
+ finish_reason="stop",
+ index=0,
+ message=ChatCompletionMessage(
+ content=response,
+ role=role,
+ ),
+ )
+ ],
+ created=int(datetime.datetime.now().timestamp()),
+ )
+
+
+@patch("openai.resources.chat.Completions.create")
+def test_Chatbot(openai_create):
+ at = AppTest.from_file("Chatbot.py").run()
+ assert not at.exception
+ at.chat_input[0].set_value("Do you know any jokes?").run()
+ assert at.info[0].value == "Please add your OpenAI API key to continue."
+
+ JOKE = "Why did the chicken cross the road? To get to the other side."
+ openai_create.return_value = create_chat_completion(JOKE)
+ at.text_input(key="chatbot_api_key").set_value("sk-...")
+ at.chat_input[0].set_value("Do you know any jokes?").run()
+ print(at)
+ assert at.chat_message[1].markdown[0].value == "Do you know any jokes?"
+ assert at.chat_message[2].markdown[0].value == JOKE
+ assert at.chat_message[2].avatar == "assistant"
+ assert not at.exception
+
+
+@patch("langchain.llms.OpenAI.__call__")
+def test_Langchain_Quickstart(langchain_llm):
+ at = AppTest.from_file("pages/3_Langchain_Quickstart.py").run()
+ assert at.info[0].value == "Please add your OpenAI API key to continue."
+
+ RESPONSE = "1. The best way to learn how to code is by practicing..."
+ langchain_llm.return_value = RESPONSE
+ at.sidebar.text_input[0].set_value("sk-...")
+ at.button[0].set_value(True).run()
+ print(at)
+ assert at.info[0].value == RESPONSE
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 0000000..c6e56a6
--- /dev/null
+++ b/packages.txt
@@ -0,0 +1,2 @@
+tesseract-ocr
+tesseract-ocr-eng
\ No newline at end of file
diff --git a/pages/1_Profile.py b/pages/1_Profile.py
new file mode 100644
index 0000000..4a22141
--- /dev/null
+++ b/pages/1_Profile.py
@@ -0,0 +1,103 @@
+import streamlit as st
+import firebase_admin
+
+from firebase_admin import credentials
+from firebase_admin import auth
+from firebase_admin import firestore
+
+def initialize_firebase_app():
+ try:
+ firebase_admin.get_app()
+ except ValueError:
+ secrets = st.secrets["firebase-auth"]
+
+ cred = credentials.Certificate({
+ "type": secrets["type"],
+ "project_id": secrets["project_id"],
+ "private_key_id": secrets["private_key_id"],
+ "private_key": secrets["private_key"],
+ "client_email": secrets["client_email"],
+ "client_id": secrets["client_id"],
+ "auth_uri": secrets["auth_uri"],
+ "token_uri": secrets["token_uri"],
+ "auth_provider_x509_cert_url": secrets["auth_provider_x509_cert_url"],
+ "client_x509_cert_url": secrets["client_x509_cert_url"]
+ })
+
+ # Initialize the Firebase app with the created credential
+ firebase_admin.initialize_app(cred,
+ {
+ 'storageBucket': 'gs://elmeto-12de0.appspot.com'
+ }
+ )
+
+# Call the function to initialize the app
+initialize_firebase_app()
+
+st.title('Welcome to :blue[Elmento]')
+
+if 'username' not in st.session_state:
+ st.session_state.username = ''
+
+if 'useremail' not in st.session_state:
+ st.session_state.useremail = ''
+
+def f():
+ try:
+ user = auth.get_user_by_email(email)
+ print(user.uid)
+
+ st.success('Login Successfully!')
+
+ st.session_state.username = user.uid
+ st.session_state.useremail = user.email
+
+ st.session_state.signout = True
+ st.session_state.signedout = True
+ st.session_state['logged_in'] = True
+ except:
+ st.warning('Login Failed')
+
+# sign out function
+def t():
+ st.session_state.signout = False
+ st.session_state.signedout = False
+ st.session_state.username = ''
+
+if 'signedout' not in st.session_state:
+ st.session_state.signedout = False
+if 'signout' not in st.session_state:
+ st.session_state.signout = False
+
+if not st.session_state['signedout']:
+ db = firestore.client()
+ st.session_state.db = db
+ docs = db.collection('users').get()
+
+ choice = st.selectbox('Login/Signup', ['Login', 'Sign Up'])
+
+ if choice == 'Login':
+ email = st.text_input('Email Address')
+ password = st.text_input('Password', type='password')
+ st.button('Login', on_click=f)
+
+ else:
+ email = st.text_input('Email Address')
+ password = st.text_input('Password', type='password')
+
+ username = st.text_input('Enter your unique username')
+
+ if st.button('Create my account'):
+ user = auth.create_user(email=email, password=password)
+
+ doc_ref = db.collection('users').document(user.uid)
+ doc_ref.set({
+ 'uid': user.uid,
+ 'email': email,
+ })
+
+ st.success('Account created successfully!')
+ st.markdown('Please login using your email and password')
+ st.balloons()
+
+
diff --git a/pages/2_Documents.py b/pages/2_Documents.py
new file mode 100644
index 0000000..569e66d
--- /dev/null
+++ b/pages/2_Documents.py
@@ -0,0 +1,378 @@
+import base64
+import streamlit as st
+import requests
+from PIL import Image
+import io
+import pytesseract
+import shutil
+from streamlit.components.v1 import html
+from firebase_admin import firestore, storage
+import uuid
+import datetime
+import fitz
+import contextlib
+import base64
+
+# CHANGE FOR CLOUD DEPLOY!!!!
+pytesseract.pytesseract.tesseract_cmd = None
+# pytesseract.pytesseract.tesseract_cmd = r'C:\Users\sasha\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'
+
+# search for tesseract binary in path
+@st.cache_resource
+def find_tesseract_binary() -> str:
+ return shutil.which("tesseract")
+
+# INITIALISE VARIABLES #################################################################################################
+
+# pytesseract
+pytesseract.pytesseract.tesseract_cmd = find_tesseract_binary()
+if not pytesseract.pytesseract.tesseract_cmd:
+ st.error("Tesseract binary not found in PATH. Please install Tesseract.")
+
+# firestore database
+db = firestore.client()
+bucket = storage.bucket('elmeto-12de0.appspot.com')
+
+# logged in parameter
+if 'logged_in' not in st.session_state:
+ st.session_state.logged_in = False
+
+
+# INITIALISE FUNCTIONS #################################################################################################
+
+def encode_image(image_path):
+ with open(image_path, "rb") as image_file:
+ return base64.b64encode(image_file.read()).decode('utf-8')
+
+def save_uploaded_file(uploaded_file, target_path):
+ with open(target_path, "wb") as f:
+ f.write(uploaded_file.getbuffer())
+
+def send_image_to_openai(image_bytes, api_key, key):
+ base64_image = base64.b64encode(image_bytes).decode('utf-8')
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {api_key}"
+ }
+
+ payload = {
+ "model": "gpt-4-vision-preview",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "What’s in this image? Explain the image content"
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/jpeg;base64,{base64_image}"
+ }
+ }
+ ]
+ }
+ ],
+ "max_tokens": 100
+ }
+ if st.button("Get Explanation", key = key):
+ try:
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+ print(response.json())
+ st.success("Explanation: {}".format(response.json()['choices'][0]['message']['content']))
+ except Exception as e:
+ st.error(f"Error: {e}")
+
+def send_text_to_openai(text_content):
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {api_key}"
+ }
+
+ payload = {
+ "model": f"gpt-3.5-turbo-0125",
+ "messages": [
+ {
+ "role": "user",
+ "content": f"Summarise this text for me: ... {text_content}"
+ }
+ ],
+ "max_tokens": 100
+ }
+
+ try:
+ response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
+ explanation = response.json()['choices'][0]['message']['content']
+ st.success(f"Explanation: {explanation}")
+ except Exception as e:
+ st.error(f"Error: {e}")
+
+def chat_to_ai(file_name):
+ # Functionality to chat about the specific PDF
+ st.write(f"Chatting about {file_name}...")
+
+def get_summary(pdf_bytes, file_name):
+ st.write(f"Getting summary for {file_name}...")
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+ pdf_images = []
+ pdf_texts = [] # List to store text from all pages
+
+ for page_index in range(len(doc)):
+ page = doc[page_index]
+ pix = page.get_pixmap()
+ image_data = pix.tobytes()
+ pdf_image = Image.open(io.BytesIO(image_data))
+ pdf_images.append(pdf_image)
+
+ text = pytesseract.image_to_string(pdf_image)
+ pdf_texts.append(text)
+
+ st.write(pdf_texts)
+ send_text_to_openai(pdf_texts)
+
+
+def nav_page(page_name, timeout_secs=3):
+ nav_script = """
+
+ """ % (page_name, timeout_secs)
+ html(nav_script)
+
+
+def get_existing_files():
+ docs_ref = db.collection('users').document(username).collection('documents')
+ docs = docs_ref.get()
+ files = [doc.to_dict() for doc in docs]
+ return files
+
+
+def get_existing_file_names():
+ names = []
+ docs_ref = db.collection('users').document(username).collection('documents')
+ docs = docs_ref.get()
+ files = [doc.to_dict() for doc in docs]
+ for file in files:
+ filename = file['filename']
+ names.append(filename)
+ return names
+
+
+def get_last_file():
+ docs_ref = db.collection('users').document(username).collection('documents')
+ query = docs_ref.order_by('uploaded_at', direction=firestore.Query.DESCENDING).limit(1)
+ docs = query.stream()
+ # docs = docs_ref.get()
+ files = [doc.to_dict() for doc in docs]
+ file = files[len(files)-1]
+ return file
+
+
+def check_file(file):
+ response = requests.get(file['url'])
+ response_url = file['url']
+ response_filename = file['filename']
+ if response.status_code == 200:
+ st.markdown(f"[{response_filename}]({response_url})")
+ else:
+ st.write(f"Failed: {response.status_code} file name: {file['filename']}")
+
+
+def create_thumbnail(image_stream, format):
+ size = (128, 128)
+ image = Image.open(image_stream)
+ image.thumbnail(size)
+
+ thumb_io = io.BytesIO()
+ image.save(thumb_io, format, quality=95)
+ thumb_io.seek(0)
+ return thumb_io
+
+
+def display_file_with_thumbnail(file):
+ if file.get('thumbnail_url'):
+ link = f"[![Thumbnail]({file['thumbnail_url']})]({file['url']})"
+ st.markdown(link, unsafe_allow_html=True)
+ else:
+ st.markdown(f"[{file['filename']}]({file['url']})")
+
+
+def parse_text():
+ st.write('parsing...')
+
+
+def pdf_page_to_image(pdf_stream):
+ doc = fitz.open("pdf", pdf_stream)
+ page = doc.load_page(0)
+
+ pix = page.get_pixmap(matrix=fitz.Matrix(72 / 72, 72 / 72))
+
+ img_bytes = io.BytesIO()
+ img_bytes.write(pix.tobytes("png"))
+ img_bytes.seek(0)
+
+ doc.close()
+ return img_bytes
+
+def pdf_parse_content(pdf_bytes):
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+ pdf_images = []
+ pdf_texts = [] # List to store text from all pages
+
+ for page_index in range(len(doc)):
+ page = doc[page_index]
+ pix = page.get_pixmap()
+ image_data = pix.tobytes()
+ pdf_image = Image.open(io.BytesIO(image_data))
+ pdf_images.append(pdf_image)
+
+ text = pytesseract.image_to_string(pdf_image)
+ pdf_texts.append(text)
+
+ st.session_state['username'] = username
+ st.session_state['pdf_images'] = pdf_images
+ st.session_state['pdf_texts'] = pdf_texts
+ st.session_state['file_name'] = file['filename']
+ st.session_state['chat_file_name'] = file['filename']
+ st.session_state['doc_id'] = file['doc_id']
+
+ chat_id = file['doc_id']
+
+ #adding chat to db
+ doc_ref = db.collection('users').document(username).collection('chats').document(chat_id)
+ doc_ref.set({
+ 'filename': file['filename'],
+ 'pdf_text': pdf_texts,
+ 'chat_id' : chat_id,
+ 'file_id' : file['doc_id']
+ })
+
+ nav_page("chat_to_ai")
+
+def upload_file(uploaded_file, thumbnail_stream):
+ blob = bucket.blob(f"{username}/{uuid.uuid4()}_{uploaded_file.name}")
+ blob.upload_from_string(uploaded_file.getvalue(), content_type=uploaded_file.type)
+
+ # Prepare the thumbnail
+ if thumbnail_stream:
+ thumb_blob = bucket.blob(f"{username}/{uuid.uuid4()}_thumb_{uploaded_file.name}")
+ thumb_blob.upload_from_string(thumbnail_stream.getvalue(), content_type='image/png')
+
+ thumb_url = thumb_blob.generate_signed_url(version="v4", expiration=datetime.timedelta(minutes=10000),
+ method='GET')
+ else:
+ thumb_url = None
+
+ url = blob.generate_signed_url(version="v4", expiration=datetime.timedelta(minutes=10000), method='GET')
+
+ doc_ref = db.collection('users').document(username).collection('documents').document()
+ doc_ref.set({
+ 'filename': uploaded_file.name,
+ 'content_type': uploaded_file.type,
+ 'url': url,
+ 'blob': str(blob),
+ 'thumbnail_url': thumb_url,
+ 'uploaded_at': firestore.SERVER_TIMESTAMP,
+ 'doc_id': doc_ref.id
+ })
+
+ return doc_ref.get().to_dict()
+
+def delete_file(username, file_id):
+ st.write(f"Trying to delete...")
+ try:
+ # Document reference
+ doc_ref = db.collection('users').document(username).collection('documents').document(file_id)
+ # the file id here needs to be replaced by the chat_id
+ chats_ref = db.collection('users').document(username).collection('chats').document(file_id)
+ chats_ref.delete()
+ doc_ref.delete()
+ st.write('Deleted successfully')
+ except Exception as e:
+ st.write(f"An error occurred while trying to delete the file: {e}")
+
+def display_file_with_thumbnail(file):
+ if file.get('thumbnail_url'):
+ st.image(file['thumbnail_url'], caption=file['filename'], width=300)
+ else:
+ st.markdown(f"[{file['filename']}]({file['url']})")
+
+def upload_single_file(uploaded_file):
+ print('Uploading new file...')
+ thumbnail_stream = None
+ if uploaded_file.type.startswith('image/'):
+ thumbnail_stream = create_thumbnail(uploaded_file, uploaded_file.type.split('/')[-1])
+ elif uploaded_file.type.startswith('application/pdf'):
+ thumbnail_stream = pdf_page_to_image(uploaded_file.getvalue())
+
+ upload_file(uploaded_file, thumbnail_stream)
+ if thumbnail_stream is not None:
+ with contextlib.closing(thumbnail_stream):
+ pass
+
+ # st.write(f'Current document is:')
+ file = get_last_file()
+ return file
+
+def get_img_blob(file):
+ blob_path = file['blob']
+ parts = blob_path.split(',')
+ blob_path = parts[1].strip()
+ blob = bucket.blob(blob_path)
+ image_bytes = blob.download_as_bytes()
+ return image_bytes
+
+st.title("Documents")
+
+if st.session_state.logged_in:
+ api_key = st.text_input("OpenAI API Key", key="file_qa_api_key", type="password")
+ username = st.session_state.username
+
+ files = get_existing_files()
+ existing_file_names = [file['filename'] for file in files] # List of existing file names
+
+ with st.form("my-form", clear_on_submit=True):
+ uploaded_file = st.file_uploader("FILE UPLOADER")
+ submitted = st.form_submit_button("UPLOAD!")
+
+ if uploaded_file and uploaded_file.name not in existing_file_names:
+ file = upload_single_file(uploaded_file)
+ uploaded_file = None # Clear the uploaded file after handling
+ st.experimental_rerun()
+
+ if files:
+ st.write(f'All files are:')
+ for file in files:
+ display_file_with_thumbnail(file)
+ if st.button("Delete", key=f"delete_{file['url']}"):
+ delete_file(username, file['doc_id']) # Function to delete the file
+ file_extension = file['filename'].split(".")[-1].lower()
+ if file_extension in ["jpg", "jpeg", "png"]:
+ image_bytes = get_img_blob(file)
+ send_image_to_openai(image_bytes, api_key, key=f"chat_{file['url']}")
+ elif file_extension == "pdf":
+ pdf_bytes = get_img_blob(file)
+ if st.button("Chat to AI", key=f"chat_{file['url']}"):
+ pdf_parse_content(pdf_bytes)
+ if st.button("Get Summary", key=f"chat_summary_{file['url']}"):
+ get_summary(pdf_bytes, file['filename'])
+else:
+ st.write('Register please.')
diff --git a/pages/3_Chat_To_AI.py b/pages/3_Chat_To_AI.py
new file mode 100644
index 0000000..eeb3ce1
--- /dev/null
+++ b/pages/3_Chat_To_AI.py
@@ -0,0 +1,90 @@
+import streamlit as st
+from firebase_admin import firestore, storage
+import streamlit as st
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import OpenAI
+from langchain.callbacks import get_openai_callback
+import datetime
+
+db = firestore.client()
+
+# FUNCTIONS
+def response_func(prompt, text):
+ text = str(text)
+ text_splitter = CharacterTextSplitter(
+ separator="\n",
+ chunk_size=1000,
+ chunk_overlap=200,
+ length_function=len
+ )
+ chunks = text_splitter.split_text(text)
+ embeddings = OpenAIEmbeddings(openai_api_key = api_key)
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
+ docs = knowledge_base.similarity_search(prompt)
+ llm = OpenAI(openai_api_key = api_key)
+ chain = load_qa_chain(llm, chain_type="stuff")
+ with get_openai_callback() as cb:
+ result = chain.run(input_documents=docs, question=prompt)
+ return result
+
+
+def display_messages(chat_id, username):
+ # Fetch messages from Firestore
+ messages = db.collection('users').document(username).collection('chats').document(chat_id).collection(
+ 'messages').order_by("timestamp").stream()
+
+ # Display messages using Streamlit's chat message format
+ for message in messages:
+ if 'message_user' in message.to_dict() and message.get('message_user'):
+ with st.chat_message("user"):
+ st.markdown(message.get('message_user'))
+
+ if 'message_ai' in message.to_dict() and message.get('message_ai'):
+ with st.chat_message("assistant"):
+ st.markdown(message.get('message_ai'))
+
+if 'logged_in' not in st.session_state:
+ st.session_state.logged_in = False
+
+st.title("Chat To AI")
+
+# MAIN SCRIPT
+if 'logged_in' in st.session_state and st.session_state.logged_in:
+ api_key = st.text_input("OpenAI API Key", key="file_docs_api_key", type="password")
+ if 'username' in st.session_state:
+ username = st.session_state['username']
+ # st.write(f"Logged in as: {username}")
+
+ chats_ref = db.collection('users').document(username).collection('chats')
+ chats = chats_ref.get()
+ chats_all = [chat.to_dict() for chat in chats]
+ chat_names = [chat['filename'] for chat in chats_all if 'filename' in chat]
+ selected_chat_name = st.sidebar.radio("Select a Chat:", chat_names)
+ selected_chat_data = next((chat for chat in chats_all if chat['filename'] == selected_chat_name), None)
+
+ if selected_chat_data:
+ st.write(f"Starting chat session FOR: {selected_chat_data['filename']}")
+ st.write(f"The id in the selected file is: {selected_chat_data['chat_id']}")
+ display_messages(selected_chat_data['chat_id'], username)
+ if prompt := st.chat_input("What is up?"):
+ chat_id = selected_chat_data['chat_id']
+ with st.chat_message("user"):
+ st.markdown(prompt)
+ #st.session_state.messages.append({"role": "user", "content": prompt})
+ response = response_func(prompt, selected_chat_data['pdf_text'])
+ with st.chat_message("assistant"):
+ st.markdown(response)
+ doc_ref = db.collection('users').document(username).collection('chats').document(chat_id).collection(
+ 'messages').document()
+ doc_ref.set({
+ 'message_user': prompt,
+ 'message_ai' : response,
+ 'timestamp': datetime.datetime.now(datetime.timezone.utc).isoformat()
+ })
+
+else:
+ st.write('Please register or login to continue.')
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..8635be6
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,5 @@
+black==23.3.0
+mypy==1.4.1
+pre-commit==3.3.3
+watchdog
+pytest
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d55880c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+streamlit>=1.28
+langchain>=0.0.217
+openai>=1.2
+duckduckgo-search
+anthropic>=0.3.0
+trubrics>=1.4.3
+pytesseract==0.3.8
+pyMuPDF==1.24.2
+pytest-shutil
+tiktoken
+faiss-cpu
+google-cloud-firestore
+firebase-admin
+pdf2image==1.17.0
\ No newline at end of file
diff --git a/test/test_firebaseStorage.py b/test/test_firebaseStorage.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/test_firebasedata.py b/test/test_firebasedata.py
new file mode 100644
index 0000000..ea3f7c4
--- /dev/null
+++ b/test/test_firebasedata.py
@@ -0,0 +1,15 @@
+import streamlit as st
+from google.cloud import firestore
+
+# Authenticate to Firestore with the JSON account key.
+db = firestore.Client.from_service_account_json(r"C:\Users\sasha\PycharmProjects\elmento\elmento-secret.json")
+
+# Create a reference to the Google post.
+doc_ref = db.collection("posts").document("Google")
+
+# Then get the data at that reference.
+doc = doc_ref.get()
+
+# Let's see what we got!
+st.write("The id is: ", doc.id)
+st.write("The contents are: ", doc.to_dict())
\ No newline at end of file