Skip to content

Commit

Permalink
Merge branch 'main' into example-video-captioning-agent-phi-2176
Browse files Browse the repository at this point in the history
  • Loading branch information
Ayush0054 authored Dec 23, 2024
2 parents a6a9445 + b65c7a7 commit 4627de4
Show file tree
Hide file tree
Showing 41 changed files with 1,144 additions and 87 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,6 @@ data.db

.ipynb_checkpoints

audio_generations

*.db
8 changes: 2 additions & 6 deletions cookbook/agents/14_generate_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,5 @@
images = image_agent.get_images()
if images and isinstance(images, list):
for image_response in images:
image_data = image_response.get("data") # type: ignore
if image_data:
for image in image_data:
image_url = image.get("url") # type: ignore
if image_url:
print(image_url)
image_url = image_response.url
print(image_url)
7 changes: 3 additions & 4 deletions cookbook/agents/37_audio_input_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import requests
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

# Fetch the audio file and convert it to a base64 encoded string
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
Expand All @@ -22,7 +23,5 @@
audio={"data": encoded_string, "format": "wav"},
)

if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("dog.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
15 changes: 6 additions & 9 deletions cookbook/agents/38_audio_multi_turn.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import base64
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

agent = Agent(
model=OpenAIChat(
id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}
),
debug_mode=True,
add_history_to_messages=True,
)

agent.run("Is a golden retriever a good family dog?")
if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("tmp/answer_1.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_1.wav")

agent.run("Why do you say they are loyal?")
if agent.run_response.audio is not None and "data" in agent.run_response.audio:
wav_bytes = base64.b64decode(agent.run_response.audio["data"])
with open("tmp/answer_2.wav", "wb") as f:
f.write(wav_bytes)
if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_2.wav")
8 changes: 3 additions & 5 deletions cookbook/agents/42_image_to_audio.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import base64
from pathlib import Path
from rich import print
from rich.text import Text

from phi.agent import Agent, RunResponse
from phi.model.openai import OpenAIChat
from phi.utils.audio import write_audio_to_file

cwd = Path(__file__).parent.resolve()

Expand All @@ -23,7 +23,5 @@
)

audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}")
if audio_story.audio is not None and "data" in audio_story.audio:
wav_bytes = base64.b64decode(audio_story.audio["data"])
with open(cwd.joinpath("tmp/multimodal-agents.wav"), "wb") as f:
f.write(wav_bytes)
if audio_story.response_audio is not None and "data" in audio_story.response_audio:
write_audio_to_file(audio=audio_story.response_audio["data"], filename="tmp/multimodal-agents.wav")
Empty file added cookbook/chunking/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions cookbook/mysql-init/init.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Create 'users' table
CREATE TABLE IF NOT EXISTS users (
id INT AUTO_INCREMENT PRIMARY KEY,
username VARCHAR(50) NOT NULL UNIQUE,
email VARCHAR(100) NOT NULL UNIQUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Create 'products' table
CREATE TABLE IF NOT EXISTS products (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT,
price DECIMAL(10,2) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
16 changes: 16 additions & 0 deletions cookbook/playground/gemini_agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from phi.agent import Agent
from phi.tools.yfinance import YFinanceTools
from phi.playground import Playground, serve_playground_app
from phi.model.google import Gemini

finance_agent = Agent(
name="Finance Agent",
model=Gemini(id="gemini-2.0-flash-exp"),
tools=[YFinanceTools(stock_price=True)],
debug_mode=True,
)

app = Playground(agents=[finance_agent]).get_app(use_async=False)

if __name__ == "__main__":
serve_playground_app("gemini_agents:app", reload=True)
30 changes: 29 additions & 1 deletion cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.dalle import Dalle
from phi.tools.eleven_labs_tools import ElevenLabsTools
from phi.tools.giphy import GiphyTools
from phi.tools.models_labs import ModelsLabs
from phi.model.response import FileType
Expand Down Expand Up @@ -88,6 +89,7 @@

gif_agent = Agent(
name="Gif Generator Agent",
agent_id="gif_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[GiphyTools()],
description="You are an AI agent that can generate gifs using Giphy.",
Expand All @@ -102,8 +104,34 @@
storage=SqlAgentStorage(table_name="gif_agent", db_file=image_agent_storage_file),
)

audio_agent = Agent(
name="Audio Generator Agent",
agent_id="audio_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[
ElevenLabsTools(
voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", target_directory="audio_generations"
)
],
description="You are an AI agent that can generate audio using the ElevenLabs API.",
instructions=[
"When the user asks you to generate audio, use the `text_to_speech` tool to generate the audio.",
"You'll generate the appropriate prompt to send to the tool to generate audio.",
"You don't need to find the appropriate voice first, I already specified the voice to user."
"Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
"The audio should be long and detailed.",
],
markdown=True,
debug_mode=True,
add_history_to_messages=True,
add_datetime_to_instructions=True,
storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
)

app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent]).get_app(use_async=False)

app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
use_async=False
)

if __name__ == "__main__":
serve_playground_app("multimodal_agent:app", reload=True)
12 changes: 12 additions & 0 deletions cookbook/providers/google/flash_thinking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from phi.agent import Agent
from phi.model.google import Gemini

task = (
"Three missionaries and three cannibals need to cross a river. "
"They have a boat that can carry up to two people at a time. "
"If, at any time, the cannibals outnumber the missionaries on either side of the river, the cannibals will eat the missionaries. "
"How can all six people get across the river safely? Provide a step-by-step solution and show the solutions as an ascii diagram"
)

agent = Agent(model=Gemini(id="gemini-2.0-flash-thinking-exp-1219"), markdown=True)
agent.print_response(task, stream=True)
18 changes: 18 additions & 0 deletions cookbook/providers/ollama/agent_set_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Run `pip install yfinance` to install dependencies."""

from ollama import Client as OllamaClient
from phi.agent import Agent, RunResponse # noqa
from phi.model.ollama import Ollama
from phi.playground import Playground, serve_playground_app
from phi.tools.yfinance import YFinanceTools

agent = Agent(
model=Ollama(id="llama3.1:8b", client=OllamaClient()),
tools=[YFinanceTools(stock_price=True)],
markdown=True,
)

app = Playground(agents=[agent]).get_app()

if __name__ == "__main__":
serve_playground_app("agent_set_client:app", reload=True)
8 changes: 8 additions & 0 deletions cookbook/providers/ollama/agent_stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Iterator # noqa
from phi.agent import Agent, RunResponse # noqa
from phi.model.ollama import Ollama
from phi.tools.crawl4ai_tools import Crawl4aiTools
from phi.tools.yfinance import YFinanceTools

agent = Agent(
Expand All @@ -20,3 +21,10 @@

# Print the response in the terminal
agent.print_response("What are analyst recommendations for NVDA and TSLA", stream=True)


agent = Agent(model=Ollama(id="llama3.1:8b"), tools=[Crawl4aiTools(max_length=1000)], show_tool_calls=True)
agent.print_response(
"Summarize me the key points in bullet points of this: https://blog.google/products/gemini/google-gemini-deep-research/",
stream=True,
)
Empty file added cookbook/readers/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions cookbook/run_mysql.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
docker run -d \
-e MYSQL_ROOT_PASSWORD=phi \
-e MYSQL_DATABASE=phi \
-e MYSQL_USER=phi \
-e MYSQL_PASSWORD=phi \
-p 3306:3306 \
-v mysql_data:/var/lib/mysql \
-v $(pwd)/cookbook/mysql-init:/docker-entrypoint-initdb.d \
--name mysql \
mysql:8.0
13 changes: 13 additions & 0 deletions cookbook/storage/json_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Run `pip install duckduckgo-search openai` to install dependencies."""

from phi.agent import Agent
from phi.tools.duckduckgo import DuckDuckGo
from phi.storage.agent.json import JsonFileAgentStorage

agent = Agent(
storage=JsonFileAgentStorage(dir_path="tmp/agent_sessions_json"),
tools=[DuckDuckGo()],
add_history_to_messages=True,
)
agent.print_response("How many people live in Canada?")
agent.print_response("What is their national anthem called?")
13 changes: 13 additions & 0 deletions cookbook/storage/yaml_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""Run `pip install duckduckgo-search openai` to install dependencies."""

from phi.agent import Agent
from phi.tools.duckduckgo import DuckDuckGo
from phi.storage.agent.yaml import YamlFileAgentStorage

agent = Agent(
storage=YamlFileAgentStorage(dir_path="tmp/agent_sessions_yaml"),
tools=[DuckDuckGo()],
add_history_to_messages=True,
)
agent.print_response("How many people live in Canada?")
agent.print_response("What is their national anthem called?")
1 change: 0 additions & 1 deletion cookbook/tools/composio_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from phi.agent import Agent
from composio_phidata import Action, ComposioToolSet # type: ignore


toolset = ComposioToolSet()
composio_tools = toolset.get_tools(actions=[Action.GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER])

Expand Down
22 changes: 22 additions & 0 deletions cookbook/tools/confluence_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from phi.agent import Agent
from phi.tools.confluence import ConfluenceTools


agent = Agent(
name="Confluence agent",
tools=[ConfluenceTools()],
show_tool_calls=True,
markdown=True,
)

## getting space details
agent.print_response("How many spaces are there and what are their names?")

## getting page_content
agent.print_response("What is the content present in page 'Large language model in LLM space'")

## getting page details in a particular space
agent.print_response("Can you extract all the page names from 'LLM' space")

## creating a new page in a space
agent.print_response("Can you create a new page named 'TESTING' in 'LLM' space")
32 changes: 32 additions & 0 deletions cookbook/tools/elevenlabs_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
pip install elevenlabs
"""

from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.eleven_labs_tools import ElevenLabsTools

audio_agent = Agent(
model=OpenAIChat(id="gpt-4o"),
tools=[
ElevenLabsTools(
voice_id="21m00Tcm4TlvDq8ikWAM", model_id="eleven_multilingual_v2", target_directory="audio_generations"
)
],
description="You are an AI agent that can generate audio using the ElevenLabs API.",
instructions=[
"When the user asks you to generate audio, use the `generate_audio` tool to generate the audio.",
"You'll generate the appropriate prompt to send to the tool to generate audio.",
"You don't need to find the appropriate voice first, I already specified the voice to user."
"Return the audio file name in your response. Don't convert it to markdown.",
"The audio should be long and detailed.",
],
markdown=True,
debug_mode=True,
show_tool_calls=True,
)

audio_agent.print_response("Generate a very long audio of history of french revolution")


audio_agent.print_response("Generate a kick sound effect")
Loading

0 comments on commit 4627de4

Please sign in to comment.