Install dependencies using the following commands:
git clone https://github.com/wandb/rag
cd rag/finance_multi_modal_rag
pip install -U pip uv
uv sync
Next, you need to activate the virtual environment:
source .venv/bin/activate
Finally, you need to get a Cohere API key (depending on which model you use).
First, you need to fetch the 10-Q filings from Edgar database and generate image descriptions using meta-llama/Llama-3.2-90B-Vision-Instruct.
import weave
from edgar import set_identity
from finance_multi_modal_rag.data_loading import EdgarDataLoader
from finance_multi_modal_rag.llm_wrapper import MultiModalPredictor
def load_data(company_name: str, forms: list[str]):
filings_data = []
predictor = MultiModalPredictor(
model_name="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
base_url="http://195.242.25.198:8032/v1",
)
for form in forms:
filings_data += EdgarDataLoader(
company_name=company_name, image_description_generator=predictor
).load_data(form)
weave.publish(weave.Dataset(name=f"{company_name}_sec_filings", rows=filings_data))
return filings_data
if __name__ == "__main__":
set_identity("<YOUR-NAME> <YOUR-EMAIL-ID>")
weave.init(project_name="finance_multi_modal_rag")
load_data("TSLA", ["10-Q"])
Next, we generate the chunks from our documents using the following code:
import weave
from dotenv import load_dotenv
from finance_multi_modal_rag.chunking import chunk_documents
load_dotenv()
weave.init(project_name="finance_multi_modal_rag")
chunk_documents(
source_dataset_address="TSLA_sec_filings:v8",
target_dataset_name="TSLA_sec_filings_chunks",
)
Finally, we can generate our responses using the following code:
import weave
from dotenv import load_dotenv
from finance_multi_modal_rag.llm_wrapper import MultiModalPredictor
from finance_multi_modal_rag.response_generation import FinanceQABot
from finance_multi_modal_rag.retrieval import BGERetriever
load_dotenv()
weave.init(project_name="finance_multi_modal_rag")
finace_qa_bot = FinanceQABot(
predictor=MultiModalPredictor(
model_name="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
base_url="http://195.242.25.198:8032/v1",
),
retriever=BGERetriever(
weave_chunked_dataset_address="TSLA_sec_filings_chunks:v1",
model_name="BAAI/bge-small-en-v1.5",
),
weave_corpus_dataset_address="TSLA_sec_filings:v8",
)
finace_qa_bot.predict(query="what did elon say in the tweets that tesla reported?")