Skip to content

Rag branch #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Simple RAG:

1. Get question from user
2. Use answer to search Postgres database
3. Format rows in an LLM-compatible format
4. Send question and formatted rows to LLM
"""

import os

import numpy as np
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference import EmbeddingsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential
import psycopg2
from dotenv import load_dotenv
from pgvector.psycopg2 import register_vector

# Set up PostgreSQL database
load_dotenv(override=True)
DBUSER = os.environ["DBUSER"]
DBPASS = os.environ["DBPASS"]
DBHOST = os.environ["DBHOST"]
DBNAME = os.environ["DBNAME"]

conn = psycopg2.connect(database=DBNAME, user=DBUSER, password=DBPASS, host=DBHOST)
conn.autocommit = True
register_vector(conn)
cur = conn.cursor()

# Setup Azure AI variables
token = os.environ["GITHUB_TOKEN"]
endpoint = "https://models.inference.ai.azure.com"


# Get question from user
question = "videos sobre extensiones?"

# Convert question to an embedding
client = EmbeddingsClient(endpoint=endpoint, credential=AzureKeyCredential(token))

response = client.embed(
input=["first phrase", "second phrase", "third phrase"], model="text-embedding-3-small", dimensions=256
)

embedding = response.data[0].embedding

# Search the database for the most similar embeddings
query = """
WITH semantic_search AS (
SELECT id, RANK () OVER (ORDER BY embedding <=> %(embedding)s) AS rank
FROM videos
ORDER BY embedding <=> %(embedding)s
LIMIT 20
),
keyword_search AS (
SELECT id, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', description), query) DESC)
FROM videos, phraseto_tsquery('english', %(query)s) query
WHERE to_tsvector('english', description) @@ query
ORDER BY ts_rank_cd(to_tsvector('english', description), query) DESC
LIMIT 20
)
SELECT
COALESCE(semantic_search.id, keyword_search.id) AS id,
COALESCE(1.0 / (%(k)s + semantic_search.rank), 0.0) +
COALESCE(1.0 / (%(k)s + keyword_search.rank), 0.0) AS score
FROM semantic_search
FULL OUTER JOIN keyword_search ON semantic_search.id = keyword_search.id
ORDER BY score DESC
LIMIT 5
"""
cur.execute(query, {"embedding": np.array(embedding), "query": question, "k": 60})

# Now fetch rows for each ID
rows = []
for row in cur.fetchall():
cur.execute("SELECT id, title, description FROM videos WHERE id = %s", (row[0],))
rows.append(cur.fetchone())

# query = """
# SELECT title, description
# FROM videos
# ORDER BY embedding <=> %(embedding)s
# LIMIT 20
# """
# cur.execute(query, {"embedding": np.array(embedding)})

# Do a full-text search of the database
# cur.execute("""
# SELECT title, description, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', description), query) DESC)
# FROM videos, plainto_tsquery('english', %s) query
# WHERE to_tsvector('english', description) @@ query
# ORDER BY ts_rank_cd(to_tsvector('english', description), query) DESC
# LIMIT 20
# """, (question,))
# cur.execute(
# "SELECT * FROM videos WHERE to_tsvector(title || ' ' || description) @@ websearch_to_tsquery(%s)", (question,)
# )
# rows = cur.fetchall()

# Format rows in an LLM-compatible format
formatted_rows = ""
for row in rows:
formatted_rows += f"## {row[1]}\n\n{row[2]}\n\n"
print(formatted_rows)

# Send question and formatted rows to LLM

client = ChatCompletionsClient(
endpoint=endpoint,
credential=AzureKeyCredential(token),
)

response = client.complete(
messages=[
SystemMessage(
content="You answer questions about VS Code videos. You must answer only according to the sources, which will be described in Markdown starting with ## Video Title "
),
UserMessage(content=question + "\n\nSources:\n\n" + formatted_rows),
],
model="meta-llama-3.1-405b-instruct",
temperature=1.0,
max_tokens=1000,
top_p=1.0,
)

print("Answer:")
print(response.choices[0].message.content)
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ python-dotenv==1.0.1
SQLAlchemy[asyncio]==2.0.27
pgvector==0.2.5
SQLModel==0.0.16
asyncpg==0.29.0
asyncpg==0.29.0
azure-ai-inference
Binary file added video_backup.dump
Binary file not shown.
Loading