Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
8e2e7ea
Added 2.1
jacobthebanana May 27, 2025
aa440c4
Added 2.1 langfuse oai agents sdk integration.
jacobthebanana May 27, 2025
55af6ae
3.1 Added HF to Langfuse Data loading logic
jacobthebanana Jun 5, 2025
2477f71
Renamed modules.
jacobthebanana Jun 5, 2025
fc9dd86
Added 3.1 LLM-as-a-Judge
jacobthebanana Jun 5, 2025
e5fbf17
Set logfire scrubbing to False.
jacobthebanana Jun 5, 2025
ba6cd33
Whitespace fixes.
jacobthebanana Jun 5, 2025
8ba49d4
Revised 3.1 llm judge eval logic for robustness
jacobthebanana Jun 5, 2025
e6d2c81
Revised 3.1 llm judge README
jacobthebanana Jun 5, 2025
e7fde72
Implemented 3.2 synthetic data via agent pipeline
jacobthebanana Jun 9, 2025
ba84f97
Added Weaviate knowledge base integration
fcogidi Jun 9, 2025
b570673
Merge branch 'basic_ref_implementations' of https://github.com/Vector…
fcogidi Jun 9, 2025
e19658e
Add Weaviate client dependency to pyproject.toml
fcogidi Jun 10, 2025
0bd3ed4
Use weaviate cloud instance for testing
fcogidi Jun 10, 2025
eebb31b
Add Gradio Weaviate integration for ReAct knowledge retrieval agent
fcogidi Jun 10, 2025
c389322
Refactor import statement for improved clarity
fcogidi Jun 10, 2025
dc35590
Rename gradio.py to avoid conflict with installed gradio package
fcogidi Jun 10, 2025
c46d2e9
Update README and code to replace ElasticSearch with Weaviate integra…
fcogidi Jun 26, 2025
06eb164
Update .env.example to remove export statements for environment varia…
fcogidi Jun 30, 2025
203c2f4
Update dependencies
fcogidi Jul 2, 2025
3423fed
Update embedding model name to include full identifier
fcogidi Jul 2, 2025
0972752
Switched 1.1 and 2.1 to GCP/Gemini
jacobthebanana Jul 8, 2025
8e15ed5
Added 1.0 search demo
jacobthebanana Jul 8, 2025
8036fe8
Fixed timeout_secs "possibly unbound"
jacobthebanana Jul 8, 2025
052b5ac
Remove `create_wikipedia_embeddings.py`
fcogidi Jul 8, 2025
f2921ba
Upgrade dependencies, refactor code to update Langfuse client usage, …
fcogidi Jul 8, 2025
d2241ae
Added descriptions and examples to 1.0
jacobthebanana Jul 8, 2025
7868e4c
Remove source .env command from README files for dataset upload and s…
fcogidi Jul 8, 2025
272902c
Update ipython dependency
fcogidi Jul 8, 2025
14871a3
Updated and simplified 1.1
jacobthebanana Jul 8, 2025
c5ea6b8
Update pre-commit hooks and fix raised issues
fcogidi Jul 8, 2025
3874e35
Update LangFuse host URL in .env.example
fcogidi Jul 8, 2025
1789128
Update Langfuse host URL to US region in env_vars.py
fcogidi Jul 8, 2025
e835913
Refactor evaluation logic in run_eval.py
fcogidi Jul 8, 2025
8ee5b1c
Updated 2.1
jacobthebanana Jul 8, 2025
dbe2a3f
Merge remote-tracking branch 'origin/fco/ref_impls' into gemini_polish
jacobthebanana Jul 8, 2025
c2e61ea
Add random date generation for current events API request
fcogidi Jul 8, 2025
abb747c
[WIP] Refactor synthetic data generation to use knowledge base agent
fcogidi Jul 8, 2025
05ae5ae
Fix formatting issues in pre-commit config and run_eval.py
fcogidi Jul 8, 2025
d3c12cd
Increase height of chatbot component in Gradio interface
fcogidi Jul 9, 2025
d6c0ff0
Refactor tracing updates in agent interactions and increase chatbot h…
fcogidi Jul 9, 2025
27eaba6
Add multi-agent planner-researcher setup with Gradio interface
fcogidi Jul 9, 2025
4084828
Update module docstring to reflect multi-agent planner-researcher setup
fcogidi Jul 9, 2025
9174a5c
Implemented backoff and concurrency limiting for Weaviate KB
jacobthebanana Jul 9, 2025
6358e5d
Updated to langfuse v3
jacobthebanana Jul 9, 2025
60cda50
Remove agent disable tracing
fcogidi Jul 9, 2025
1889114
Add docstring for Knowledge Base Search Demo using Gradio
fcogidi Jul 9, 2025
c479c1a
Apply pre-commit fixes
fcogidi Jul 9, 2025
be272bc
Updated langfuse to v3 for 3.1
jacobthebanana Jul 9, 2025
d3ac4c6
Updated run_eval to specify input and output in root_span.
jacobthebanana Jul 9, 2025
905457f
Update README.md
fcogidi Jul 9, 2025
4b1b86d
Updated README.md
jacobthebanana Jul 9, 2025
3154d55
Updated README.md
jacobthebanana Jul 9, 2025
6717757
Updated README.md
jacobthebanana Jul 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
#!/bin/bash
# OpenAI-compatible LLM
export OPENAI_API_BASE="https://api.openai.com/v1"
export OPENAI_API_KEY="sk-proj-..."
OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
OPENAI_API_KEY="..."

# Embedding model
EMBEDDING_BASE_URL="https://..."
EMBEDDING_API_KEY="..."

# LangFuse
export LANGFUSE_PUBLIC_KEY="pk-lf-..."
export LANGFUSE_SECRET_KEY="sk-lf-..."
export LANGFUSE_HOST="https://cloud.langfuse.com"
LANGFUSE_SECRET_KEY="sk-lf-..."
LANGFUSE_PUBLIC_KEY="pk-lf-..."
LANGFUSE_HOST="https://us.cloud.langfuse.com"

# ElasticSearch
export ES_HOSTS="https://..."
export ES_API_KEY="..."
# Weaviate
WEAVIATE_HTTP_HOST="...weaviate.cloud" # or 'localhost' for local Weaviate
WEAVIATE_GRPC_HOST="grpc-...weaviate.cloud" # or 'localhost' for local Weaviate
WEAVIATE_API_KEY="..."
WEAVIATE_HTTP_PORT="443" # or 8080 for localhost
WEAVIATE_GRPC_PORT="443" # or 50051 for localhost
WEAVIATE_HTTP_SECURE="true" # set to false for localhost
WEAVIATE_GRPC_SECURE="true" # set to false for localhost
34 changes: 4 additions & 30 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,21 @@ repos:
- id: check-toml

- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.7.2
rev: 0.7.19
hooks:
- id: uv-lock

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.11.8'
rev: 'v0.12.2'
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
types_or: [python, jupyter]
- id: ruff-format
types_or: [python, jupyter]

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
hooks:
- id: mypy
entry: python3 -m mypy --config-file pyproject.toml
language: system
types: [python]
exclude: "tests"

- repo: https://github.com/crate-ci/typos
rev: v1 # v1.19.0
rev: v1.34.0
hooks:
- id: typos
args: []
Expand All @@ -54,23 +45,6 @@ repos:
- id: nbqa-ruff
args: [--fix, --exit-non-zero-on-fix]

- repo: local
hooks:
- id: doctest
name: doctest
entry: python3 -m doctest -o NORMALIZE_WHITESPACE
files: "^aieng_template/"
language: system

# - repo: local
# hooks:
# - id: pytest
# name: pytest
# entry: python3 -m pytest -m "not integration_test"
# language: system
# pass_filenames: false
# always_run: true

ci:
autofix_commit_msg: |
[pre-commit.ci] Add auto fixes from pre-commit.com hooks
Expand All @@ -80,5 +54,5 @@ ci:
autoupdate_branch: ''
autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
autoupdate_schedule: weekly
skip: [pytest,doctest,mypy]
skip: [pytest,doctest]
submodules: false
153 changes: 138 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,148 @@
# AI Engineering template (with uv)
# Agent Bootcamp

----------------------------------------------------------------------------------------

[![code checks](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/code_checks.yml/badge.svg)](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/code_checks.yml)
[![integration tests](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/integration_tests.yml/badge.svg)](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/integration_tests.yml)
[![docs](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/docs.yml/badge.svg)](https://github.com/VectorInstitute/aieng-template-uv/actions/workflows/docs.yml)
[![codecov](https://codecov.io/github/VectorInstitute/aieng-template-uv/graph/badge.svg?token=83MYFZ3UPA)](https://codecov.io/github/VectorInstitute/aieng-template-uv)
![GitHub License](https://img.shields.io/github/license/VectorInstitute/aieng-template-uv)
This is a collection of reference implementations for Vector Institute's **Agent Bootcamp**, taking place between June and September 2025. The repository demonstrates modern agentic workflows for retrieval-augmented generation (RAG), evaluation, and orchestration using the latest Python tools and frameworks.

A template repo for AI Engineering projects (using ``python``) and ``uv``. This
template is like our original AI Engineering [template](https://github.com/VectorInstitute/aieng-template),
however, unlike how that template uses poetry, this one uses uv for dependency
management (as well as packaging and publishing).
## Reference Implementations

## 🧑🏿‍💻 Developing
This repository includes several modules, each showcasing a different aspect of agent-based RAG systems:

### Installing dependencies
**1. Basics: Reason-and-Act RAG**
A minimal Reason-and-Act (ReAct) agent for knowledge retrieval, implemented without any agent framework.

The development environment can be set up using
[uv](https://github.com/astral-sh/uv?tab=readme-ov-file#installation). Hence, make sure it is
installed and then run:
- **[1.0 Search Demo](src/1_basics/0_search_demo/README.md)**
A simple demo showing the capabilities (and limitations) of a knowledgebase search.


- **[1.1 ReAct Agent for RAG](src/1_basics/1_react_rag/README.md)**
Basic ReAct agent for step-by-step retrieval and answer generation.

**2. Frameworks: OpenAI Agents SDK**
Showcases the use of the OpenAI agents SDK to reduce boilerplate and improve readability.

- **[2.1 ReAct Agent for RAG - OpenAI SDK](src/2_frameworks/1_react_rag/README.md)**
Implements the same Reason-and-Act agent using the high-level abstractions provided by the OpenAI Agents SDK. This approach reduces boilerplate and improves readability.
The use of langfuse for making the agent less of a black-box is also introduced in this module.

- **[2.2 Multi-agent Setup for Deep Research](src/2_frameworks/2_multi_agent/README.md)**
Demo of a multi-agent architecture with planner, researcher, and writer agents collaborating on complex queries.

**3. Evals: Automated Evaluation Pipelines**
Contains scripts and utilities for evaluating agent performance using LLM-as-a-judge and synthetic data generation. Includes tools for uploading datasets, running evaluations, and integrating with [Langfuse](https://langfuse.com/) for traceability.

- **[3.1 LLM-as-a-Judge](src/3_evals/1_llm_judge/README.md)**
Automated evaluation pipelines using LLM-as-a-judge with Langfuse integration.

- **[3.2 Evaluation on Synthetic Dataset](src/3_evals/2_synthetic_data/README.md)**
Showcases the generation of synthetic evaluation data for testing agents.


# Reference Implementation Repository for AI Agent Bootcamp

## Setting

Build a knowledge retrieval agent.

## Symposis

**Basics**: Build a ReAct-styled ("Reason-and-Act") agent without using agent frameworks.

**Frameworks**: Build agents with frameworks to reduce boilerplate code.

**Evals**: Agent observability, test data synthesis, and automated evaluation.

## Tooling

- **openai-agents** as the AI Agent framework.
- **langfuse** for agent observability and evaluation.
- **uv** for dependency management
- **gradio** for an interactive prototype.
- **weaviate** as the search engine for the local knowledge base.


## Getting Started

Set your API keys in `.env`. Use `.env.example` as a template.

```bash
cp -v .env.example .env
```

Run integration tests to validate that your API keys are set up correctly.

```bash
PYTHONPATH="." uv run pytest -sv tests/tool_tests/test_integration.py
```

## Reference Implementations

### 1. Basics

Interactive knowledge base demo. Access the gradio interface in your browser (see forwarded ports.)

```bash
uv run --env-file .env -m src.1_basics.0_search_demo.gradio
```

Basic Reason-and-Act Agent- command line version. To exit, press `Control-\`.

```bash
uv run --env-file .env -m src.1_basics.1_react_rag.main
```

Interactive web version of the Gradio Reason-and-Act Agent.

```bash
uv run --env-file .env -m src.1_basics.1_react_rag.gradio
```


### 2. Frameworks

Reason-and-Act Agent without the boilerplate- using the OpenAI Agent SDK.

```bash
uv run --env-file .env -m src.2_frameworks.1_react_rag.basic
uv run --env-file .env -m src.2_frameworks.1_react_rag.gradio
uv run --env-file .env -m src.2_frameworks.1_react_rag.langfuse_gradio
```

Multi-agent examples, also via the OpenAI Agent SDK.

```bash
uv run --env-file .env -m src.2_frameworks.2_multi_agent.gradio
```

### 3. Evals

Synthetic data.

```bash
uv run -m src.3_evals.2_synthetic_data.synthesize_data \
--source_dataset hf://vector-institute/hotpotqa@d997ecf:train \
--langfuse_dataset_name search-dataset-synthetic-20250609 \
--limit 18
```

Run LLM-as-a-judge Evaluation on synthetic data

```bash
uv run \
--env-file .env \
-m src.3_evals.1_llm_judge.run_eval \
--langfuse_dataset_name search-dataset-synthetic-20250609 \
--run_name enwiki_weaviate \
--limit 18
```

## Requirements

- Python 3.12+

## Getting Started

Clone the repository:

```bash
uv sync
Expand Down
4 changes: 2 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ Build a knowledge retrieval agent.
- **openai-agents** as the AI Agent framework.
- **langfuse** for agent observability and evaluation.
- **uv** for dependency management
- **streamlit** for an interactive prototype.
- **elasticsearch** as the search engine for the local knowledge base.
- **gradio** for an interactive prototype.
- **weaviate** as the search engine for the local knowledge base.
48 changes: 23 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,58 +5,55 @@ description = "Add your description here"
readme = "README.md"
authors = [ {name = "Vector AI Engineering", email = "ai_engineering@vectorinstitute.ai"}]
license = "Apache-2.0"
repository = "https://github.com/VectorInstitute/aieng-template-uv"
repository = "https://github.com/VectorInstitute/agent-bootcamp"
requires-python = ">=3.12"
dependencies = [
"elasticsearch[async]>=9.0.1",
"gradio>=5.31.0",
"openai>=1.82.0",
"openai-agents>=0.0.16",
"pydantic>=2.11.5",
"beautifulsoup4>=4.13.4",
"datasets>=3.6.0",
"gradio>=5.35.0",
"langfuse>=3.1.3",
"lxml>=6.0.0",
"nest-asyncio>=1.6.0",
"numpy<2.3.0",
"openai>=1.93.1",
"openai-agents>=0.1.0",
"pydantic>=2.11.7",
"pydantic-ai-slim[logfire]>=0.3.7",
"pytest-asyncio>=0.25.2",
"weaviate-client>=4.15.4",
]

[dependency-groups]
dev = [
"codecov>=2.1.13",
"mypy>=1.14.1",
"ipykernel>=6.29.5",
"ipython>=9.4.0",
"ipywidgets>=8.1.7",
"jupyter>=1.1.1",
"jupyterlab>=4.4.2",
"nbqa>=1.9.1",
"pip-audit>=2.7.3",
"pre-commit>=4.1.0",
"pytest>=8.3.4",
"pytest-asyncio>=0.25.2",
"pytest-cov>=6.0.0",
"pytest-mock>=3.14.0",
"ruff>=0.9.2",
"ruff>=0.12.2",
]
docs = [
"jinja2>=3.1.6", # Pinning version to address vulnerability GHSA-cpwx-vrp4-4pq7
"mkdocs>=1.6.0",
"mkdocs-material>=9.5.15",
"mkdocs-material>=9.6.15",
"mkdocstrings>=0.24.1",
"mkdocstrings-python>=1.10.0",
"mkdocstrings-python>=1.16.12",
"ipykernel>=6.29.5",
"ipython>=8.31.0",
"ipython>=9.4.0",
]

# Default dependency groups to be installed
[tool.uv]
default-groups = ["dev", "docs"]

[tool.mypy]
install_types = true
pretty = true
namespace_packages = true
explicit_package_bases = true
warn_unused_configs = true
allow_subclassing_any = false
check_untyped_defs = true
allow_untyped_decorators = false
warn_redundant_casts = true
warn_return_any = true
strict_equality = true


[tool.ruff]
include = ["*.py", "pyproject.toml", "*.ipynb"]
line-length = 88
Expand Down Expand Up @@ -95,6 +92,7 @@ ignore = [
"PLR2004", # Replace magic number with named constant
"PLR0913", # Too many arguments
"COM812", # Missing trailing comma
"N999", # Number in module names.
]

# Ignore import violations in all `__init__.py` files.
Expand Down
10 changes: 10 additions & 0 deletions src/1_basics/0_search_demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Knowledgebase Demo

This folder contains logic for showcasing the capabilities (and limitations) of the knowledgebase.

Format of the output is similar to what the Agent LLM will receive as tool output.

```bash
source .env && \
uv run -m src.1_basics.0_search_demo.gradio
```
Loading
Loading