cleanup

shinezyy · Feb 13, 2023 · b572a0e · b572a0e
1 parent b1587db
commit b572a0e
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,39 @@
+# ResearchGPT
+
+This is a flask app provides an interface to enable a conversation with a research paper. You can enter a link to a
+pdf hosted online or upload your own pdf. The app will then extract the text from the pdf, create embeddings from the text and use them with the openai api to generate a response to a question you ask. It will also return a source for the part of the text it used to generate the response and the page number. 
+
+You can try a demo here: http://researchgpt.ue.r.appspot.com
+
+## Installation
+
+```bash
+git clone https://github.com/mukulpatnaik/researchgpt.git
+cd researchgpt
+pip install -r requirements.txt
+```
+
+## Usage
+
+```bash
+python app.py
+```
+
+## Google Cloud Deployment
+
+Follow the instructions here: https://cloud.google.com/appengine/docs/standard/python3/building-app/deploying-web-service
+Once you have the app.yaml file set up with your openai key and also have gcloud cli set up, you can deploy with:
+
+```bash
+gcloud app deploy
+```
+
+To stream logs:
+
+```bash
+gcloud app logs tail
+```
+
+## Example Screenshot
+
+![Example Screenshot](demo.png)
diff --git a/demo.png b/demo.png
diff --git a/main.py b/main.py
@@ -19,7 +19,7 @@
 
 class Chatbot():
 
-    def parse_paper(self, pdf):
+    def extract_text(self, pdf):
         print("Parsing paper")
         number_of_pages = len(pdf.pages)
         print(f"Total number of pages: {number_of_pages}")
@@ -71,7 +71,7 @@ def visitor_body(text, cm, tm, fontDict, fontSize):
         # print(paper_text)
         return paper_text
 
-    def paper_df(self, pdf):
+    def create_df(self, pdf):
         print('Creating dataframe')
         filtered_pdf= []
         for row in pdf:
@@ -86,7 +86,7 @@ def paper_df(self, pdf):
         print('Done creating dataframe')
         return df
 
-    def calculate_embeddings(self, df):
+    def embeddings(self, df):
         print('Calculating embeddings')
         openai.api_key = os.getenv('OPENAI_API_KEY')
         embedding_model = "text-embedding-ada-002"
@@ -95,7 +95,7 @@ def calculate_embeddings(self, df):
         print('Done calculating embeddings')
         return df
 
-    def search_embeddings(self, df, query, n=3, pprint=True):
+    def search(self, df, query, n=3, pprint=True):
         query_embedding = get_embedding(
             query,
             engine="text-embedding-ada-002"
@@ -172,9 +172,9 @@ def process_pdf():
 
     pdf = PdfReader(BytesIO(file))
     chatbot = Chatbot()
-    paper_text = chatbot.parse_paper(pdf)
-    df = chatbot.paper_df(paper_text)
-    df = chatbot.calculate_embeddings(df)
+    paper_text = chatbot.extract_text(pdf)
+    df = chatbot.create_df(paper_text)
+    df = chatbot.embeddings(df)
 
     # Create a new blob and upload the file's content.
     blob = bucket.blob(name)
@@ -204,9 +204,9 @@ def download_pdf():
         return {"key": key}
 
     pdf = PdfReader(BytesIO(r.content))
-    paper_text = chatbot.parse_paper(pdf)
-    df = chatbot.paper_df(paper_text)
-    df = chatbot.calculate_embeddings(df)
+    paper_text = chatbot.extract_text(pdf)
+    df = chatbot.create_df(paper_text)
+    df = chatbot.embeddings(df)
 
     # Create a new blob and upload the file's content.
     blob = bucket.blob(name)