Skip to content

Commit

Permalink
several fixes, removed logging
Browse files Browse the repository at this point in the history
  • Loading branch information
itsOwen committed Aug 24, 2024
1 parent 583638c commit 65cb2b8
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 124 deletions.
16 changes: 12 additions & 4 deletions app/ui_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,32 @@ def display_info_icons():
if time.time() - st.session_state.info_icons_time > 10 or ("messages" in st.session_state and len(st.session_state.messages) > 0):
st.session_state.info_icons_displayed = False

def extract_data_from_markdown(text: Union[str, bytes, io.BytesIO]) -> Union[str, bytes, None]:
def extract_data_from_markdown(text: Union[str, bytes, io.BytesIO]) -> Union[str, bytes, io.BytesIO, None]:
if isinstance(text, io.BytesIO):
return text
if isinstance(text, bytes):
text = text.decode('utf-8')
pattern = r'```(csv|excel)\n(.*?)\n```'
match = re.search(pattern, text, re.DOTALL)
if match:
return match.group(2).strip()
data_type = match.group(1)
data = match.group(2).strip()
if data_type == 'excel':
return io.BytesIO(data.encode())
return data
return None

def format_data(data: Union[str, bytes, io.BytesIO], format_type: str):
try:
if isinstance(data, io.BytesIO):
if format_type == 'excel':
return pd.read_excel(data, engine='openpyxl')
data.seek(0)
return pd.read_excel(data, engine='openpyxl')
return pd.read_csv(data)
elif isinstance(data, bytes):
return pd.read_excel(io.BytesIO(data), engine='openpyxl')
if format_type == 'excel':
return pd.read_excel(io.BytesIO(data), engine='openpyxl')
return pd.read_csv(io.BytesIO(data))
else:
if format_type == 'csv':
csv_data = []
Expand Down
183 changes: 126 additions & 57 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import streamlit as st
import json
import asyncio
import logging
from app.streamlit_web_scraper_chat import StreamlitWebScraperChat
from app.ui_components import display_info_icons, display_message, extract_data_from_markdown, format_data
from app.utils import loading_animation, get_loading_message
Expand All @@ -11,6 +10,8 @@
import base64
from google_auth_oauthlib.flow import Flow
import io
from io import BytesIO
import re
from src.utils.google_sheets_utils import SCOPES, get_redirect_uri, display_google_sheets_button, initiate_google_auth

def handle_oauth_callback():
Expand All @@ -28,38 +29,98 @@ def handle_oauth_callback():
except Exception as e:
st.error(f"Error during OAuth callback: {str(e)}")

def serialize_bytesio(obj):
if isinstance(obj, BytesIO):
return {
"_type": "BytesIO",
"data": base64.b64encode(obj.getvalue()).decode('utf-8')
}
raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

def deserialize_bytesio(obj):
if isinstance(obj, dict) and "_type" in obj and obj["_type"] == "BytesIO":
return BytesIO(base64.b64decode(obj["data"]))
return obj

def save_chat_history(chat_history):
with open("chat_history.json", "w") as f:
json.dump(chat_history, f, default=serialize_bytesio)

def load_chat_history():
try:
with open("chat_history.json", "r") as f:
return json.load(f, object_hook=deserialize_bytesio)
except FileNotFoundError:
return {}

def safe_process_message(web_scraper_chat, message):
if message is None or message.strip() == "":
return "I'm sorry, but I didn't receive any input. Could you please try again?"
try:
response = web_scraper_chat.process_message(message)
if isinstance(response, tuple) and len(response) == 2 and isinstance(response[1], pd.DataFrame):
csv_string, df = response
st.text("CSV Data:")
st.code(csv_string, language="csv")
st.text("Interactive Table:")
st.dataframe(df)
return csv_string
st.write("Debug: Response type:", type(response))

if isinstance(response, tuple):
st.write("Debug: Response is a tuple")
if len(response) == 2 and isinstance(response[1], pd.DataFrame):
st.write("Debug: CSV data detected")
csv_string, df = response
st.text("CSV Data:")
st.code(csv_string, language="csv")
st.text("Interactive Table:")
st.dataframe(df)

csv_buffer = BytesIO()
df.to_csv(csv_buffer, index=False)
csv_buffer.seek(0)
st.download_button(
label="Download CSV",
data=csv_buffer,
file_name="data.csv",
mime="text/csv"
)

return csv_string
elif len(response) == 2 and isinstance(response[0], BytesIO):
st.write("Debug: Excel data detected")
excel_buffer, df = response
st.text("Excel Data:")
st.dataframe(df)

excel_buffer.seek(0)
st.download_button(
label="Download Original Excel file",
data=excel_buffer,
file_name="data_original.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)

excel_data = BytesIO()
with pd.ExcelWriter(excel_data, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False, sheet_name='Sheet1')
excel_data.seek(0)

st.download_button(
label="Download Excel (from DataFrame)",
data=excel_data,
file_name="data_from_df.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)

return ("Excel data displayed and available for download.", excel_buffer)
else:
st.write("Debug: Response is not a tuple")

return response
except AttributeError as e:
if "'NoneType' object has no attribute 'lower'" in str(e):
return "I encountered an issue while processing your request. It seems like I received an unexpected empty value. Could you please try rephrasing your input?"
else:
raise e
except Exception as e:
st.write("Debug: Exception occurred:", str(e))
return f"An unexpected error occurred: {str(e)}. Please try again or contact support if the issue persists."

def load_chat_history():
try:
with open("chat_history.json", "r") as f:
return json.load(f)
except FileNotFoundError:
return {}

def save_chat_history(chat_history):
with open("chat_history.json", "w") as f:
json.dump(chat_history, f)

def get_date_group(date_str):
date = datetime.strptime(date_str, "%Y-%m-%d")
today = datetime.now().date()
Expand Down Expand Up @@ -95,13 +156,6 @@ async def list_ollama_models():
st.error(f"Error fetching Ollama models: {str(e)}")
return []

def setup_logging(enable_logging):
if enable_logging:
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
return logging.getLogger(__name__)
else:
return logging.getLogger(__name__)

def load_css():
with open("app/styles.css", "r") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
Expand All @@ -124,19 +178,49 @@ def render_message(role, content, avatar_path):

def display_message_with_sheets_upload(message, message_index):
content = message["content"]
if isinstance(content, (str, bytes, io.BytesIO)):
if isinstance(content, (str, bytes, BytesIO)):
data = extract_data_from_markdown(content)
if data is not None:
if isinstance(data, io.BytesIO) or (isinstance(content, str) and 'excel' in content.lower()):
df = format_data(data, 'excel')
else:
df = format_data(data, 'csv')

if df is not None:
st.dataframe(df)
display_google_sheets_button(df)
else:
st.warning("Failed to display data as a table. Showing raw content:")
try:
is_excel = isinstance(data, BytesIO) or (isinstance(content, str) and 'excel' in content.lower())
if is_excel:
df = format_data(data, 'excel')
else:
df = format_data(data, 'csv')

if df is not None:
st.dataframe(df)

if not is_excel:
csv_buffer = BytesIO()
df.to_csv(csv_buffer, index=False)
csv_buffer.seek(0)
st.download_button(
label="📥 Download as CSV",
data=csv_buffer,
file_name="data.csv",
mime="text/csv",
key=f"csv_download_{message_index}"
)
else:
excel_buffer = BytesIO()
with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
df.to_excel(writer, index=False, sheet_name='Sheet1')
excel_buffer.seek(0)
st.download_button(
label="📥 Download as Excel",
data=excel_buffer,
file_name="data.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
key=f"excel_download_{message_index}"
)

display_google_sheets_button(df, f"sheets_upload_{message_index}")
else:
st.warning("Failed to display data as a table. Showing raw content:")
st.code(content)
except Exception as e:
st.error(f"Error processing data: {str(e)}")
st.code(content)
else:
st.markdown(content)
Expand All @@ -159,12 +243,6 @@ def main():
user_avatar_path = "app/icons/man.png"
ai_avatar_path = "app/icons/skull.png"

if 'enable_logging' not in st.session_state:
st.session_state.enable_logging = False

logger = setup_logging(st.session_state.enable_logging)
logger.debug("Starting CyberScraper 2077")

if 'chat_history' not in st.session_state:
st.session_state.chat_history = load_chat_history()
if 'current_chat_id' not in st.session_state or st.session_state.current_chat_id not in st.session_state.chat_history:
Expand All @@ -186,12 +264,6 @@ def main():
with st.sidebar:
st.title("Conversation History")

st.session_state.enable_logging = st.toggle("Enable Logging", st.session_state.enable_logging)
if st.session_state.enable_logging:
st.info("Logging is enabled. Check your console for log messages.")
else:
st.info("Logging is disabled.")

# Model selection
st.subheader("Select Model")
default_models = ["gpt-4o-mini", "gpt-3.5-turbo"]
Expand Down Expand Up @@ -296,14 +368,9 @@ def main():
prompt = st.chat_input("Enter the URL to scrape or ask a question regarding the data", key="user_input")

if prompt:
if st.session_state.enable_logging:
logger.debug(f"Received prompt: {prompt}")
st.session_state.chat_history[st.session_state.current_chat_id]["messages"].append({"role": "user", "content": prompt})
save_chat_history(st.session_state.chat_history)

if not st.session_state.web_scraper_chat:
if st.session_state.enable_logging:
logger.debug("Initializing web_scraper_chat")
st.session_state.web_scraper_chat = initialize_web_scraper_chat()

with st.chat_message("assistant"):
Expand All @@ -313,12 +380,14 @@ def main():
st.session_state.web_scraper_chat,
prompt
)
st.write("Debug: Full response type:", type(full_response))
if full_response is not None:
st.session_state.chat_history[st.session_state.current_chat_id]["messages"].append({"role": "assistant", "content": full_response})
if isinstance(full_response, tuple) and len(full_response) == 2 and isinstance(full_response[1], BytesIO):
st.session_state.chat_history[st.session_state.current_chat_id]["messages"].append({"role": "assistant", "content": full_response[0]})
else:
st.session_state.chat_history[st.session_state.current_chat_id]["messages"].append({"role": "assistant", "content": full_response})
save_chat_history(st.session_state.chat_history)
except Exception as e:
if st.session_state.enable_logging:
logger.error(f"An unexpected error occurred: {str(e)}")
st.error(f"An unexpected error occurred: {str(e)}")

st.rerun()
Expand Down
13 changes: 2 additions & 11 deletions src/ollama_models.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import requests
from typing import List, Dict, Any
import logging
import os
import json

class OllamaModel:
def __init__(self, model_name: str):
self.model_name = model_name
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.DEBUG)
self.base_url = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')

async def generate(self, prompt: str, system_prompt: str = "") -> str:
self.logger.debug(f"Generating with Ollama model: {self.model_name}")
self.logger.debug(f"Prompt (first 500 chars): {prompt[:500]}...")
try:
response = requests.post(
f"{self.base_url}/api/generate",
Expand All @@ -35,26 +30,22 @@ async def generate(self, prompt: str, system_prompt: str = "") -> str:
if 'response' in data:
full_response += data['response']
except json.JSONDecodeError:
self.logger.warning(f"Failed to parse JSON: {line}")
print(f"Error decoding JSON: {line}")

self.logger.debug(f"Ollama response (first 500 chars): {full_response[:500]}...")
return full_response
except Exception as e:
self.logger.error(f"Error generating with Ollama: {str(e)}")
print(f"An error occurred: {str(e)}")
raise

@staticmethod
async def list_models() -> List[str]:
logger = logging.getLogger(__name__)
base_url = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
try:
response = requests.get(f"{base_url}/api/tags")
response.raise_for_status()
models = response.json()
logger.debug(f"Available Ollama models: {models['models']}")
return [model['name'] for model in models['models']]
except Exception as e:
logger.error(f"Error listing Ollama models: {str(e)}")
return []

class OllamaModelManager:
Expand Down
Loading

0 comments on commit 65cb2b8

Please sign in to comment.