Merge branch 'main' into example-video-captioning-agent-phi-2176

phidatahq · Dec 23, 2024 · 4627de4 · 4627de4
2 parents a6a9445 + b65c7a7
commit 4627de4
Show file tree

Hide file tree

Showing 41 changed files with 1,144 additions and 87 deletions.
diff --git a/.gitignore b/.gitignore
@@ -48,4 +48,6 @@ data.db
 
 .ipynb_checkpoints
 
+audio_generations
+
 *.db
diff --git a/cookbook/agents/14_generate_image.py b/cookbook/agents/14_generate_image.py
@@ -16,9 +16,5 @@
 images = image_agent.get_images()
 if images and isinstance(images, list):
     for image_response in images:
-        image_data = image_response.get("data")  # type: ignore
-        if image_data:
-            for image in image_data:
-                image_url = image.get("url")  # type: ignore
-                if image_url:
-                    print(image_url)
+        image_url = image_response.url
+        print(image_url)
diff --git a/cookbook/agents/37_audio_input_output.py b/cookbook/agents/37_audio_input_output.py
@@ -2,6 +2,7 @@
 import requests
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 # Fetch the audio file and convert it to a base64 encoded string
 url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
@@ -22,7 +23,5 @@
     audio={"data": encoded_string, "format": "wav"},
 )
 
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("dog.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/dog.wav")
diff --git a/cookbook/agents/38_audio_multi_turn.py b/cookbook/agents/38_audio_multi_turn.py
@@ -1,22 +1,19 @@
-import base64
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 agent = Agent(
     model=OpenAIChat(
         id="gpt-4o-audio-preview", modalities=["text", "audio"], audio={"voice": "alloy", "format": "wav"}
     ),
+    debug_mode=True,
     add_history_to_messages=True,
 )
 
 agent.run("Is a golden retriever a good family dog?")
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("tmp/answer_1.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_1.wav")
 
 agent.run("Why do you say they are loyal?")
-if agent.run_response.audio is not None and "data" in agent.run_response.audio:
-    wav_bytes = base64.b64decode(agent.run_response.audio["data"])
-    with open("tmp/answer_2.wav", "wb") as f:
-        f.write(wav_bytes)
+if agent.run_response.response_audio is not None and "data" in agent.run_response.response_audio:
+    write_audio_to_file(audio=agent.run_response.response_audio["data"], filename="tmp/answer_2.wav")
diff --git a/cookbook/agents/42_image_to_audio.py b/cookbook/agents/42_image_to_audio.py
@@ -1,10 +1,10 @@
-import base64
 from pathlib import Path
 from rich import print
 from rich.text import Text
 
 from phi.agent import Agent, RunResponse
 from phi.model.openai import OpenAIChat
+from phi.utils.audio import write_audio_to_file
 
 cwd = Path(__file__).parent.resolve()
 
@@ -23,7 +23,5 @@
 )
 
 audio_story: RunResponse = audio_agent.run(f"Narrate the story with flair: {image_story.content}")
-if audio_story.audio is not None and "data" in audio_story.audio:
-    wav_bytes = base64.b64decode(audio_story.audio["data"])
-    with open(cwd.joinpath("tmp/multimodal-agents.wav"), "wb") as f:
-        f.write(wav_bytes)
+if audio_story.response_audio is not None and "data" in audio_story.response_audio:
+    write_audio_to_file(audio=audio_story.response_audio["data"], filename="tmp/multimodal-agents.wav")
diff --git a/cookbook/chunking/__init__.py b/cookbook/chunking/__init__.py
diff --git a/cookbook/mysql-init/init.sql b/cookbook/mysql-init/init.sql
@@ -0,0 +1,16 @@
+-- Create 'users' table
+CREATE TABLE IF NOT EXISTS users (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    username VARCHAR(50) NOT NULL UNIQUE,
+    email VARCHAR(100) NOT NULL UNIQUE,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Create 'products' table
+CREATE TABLE IF NOT EXISTS products (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    name VARCHAR(100) NOT NULL,
+    description TEXT,
+    price DECIMAL(10,2) NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
diff --git a/cookbook/playground/gemini_agents.py b/cookbook/playground/gemini_agents.py
@@ -0,0 +1,16 @@
+from phi.agent import Agent
+from phi.tools.yfinance import YFinanceTools
+from phi.playground import Playground, serve_playground_app
+from phi.model.google import Gemini
+
+finance_agent = Agent(
+    name="Finance Agent",
+    model=Gemini(id="gemini-2.0-flash-exp"),
+    tools=[YFinanceTools(stock_price=True)],
+    debug_mode=True,
+)
+
+app = Playground(agents=[finance_agent]).get_app(use_async=False)
+
+if __name__ == "__main__":
+    serve_playground_app("gemini_agents:app", reload=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -9,6 +9,7 @@
 from phi.agent import Agent
 from phi.model.openai import OpenAIChat
 from phi.tools.dalle import Dalle
+from phi.tools.eleven_labs_tools import ElevenLabsTools
 from phi.tools.giphy import GiphyTools
 from phi.tools.models_labs import ModelsLabs
 from phi.model.response import FileType
@@ -88,6 +89,7 @@
 
 gif_agent = Agent(
     name="Gif Generator Agent",
+    agent_id="gif_agent",
     model=OpenAIChat(id="gpt-4o"),
     tools=[GiphyTools()],
     description="You are an AI agent that can generate gifs using Giphy.",
@@ -102,8 +104,34 @@
     storage=SqlAgentStorage(table_name="gif_agent", db_file=image_agent_storage_file),
 )
 
+audio_agent = Agent(
+    name="Audio Generator Agent",
+    agent_id="audio_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[
+        ElevenLabsTools(
+            voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", target_directory="audio_generations"
+        )
+    ],
+    description="You are an AI agent that can generate audio using the ElevenLabs API.",
+    instructions=[
+        "When the user asks you to generate audio, use the `text_to_speech` tool to generate the audio.",
+        "You'll generate the appropriate prompt to send to the tool to generate audio.",
+        "You don't need to find the appropriate voice first, I already specified the voice to user."
+        "Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
+        "The audio should be long and detailed.",
+    ],
+    markdown=True,
+    debug_mode=True,
+    add_history_to_messages=True,
+    add_datetime_to_instructions=True,
+    storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
+)
 
-app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent]).get_app(use_async=False)
+
+app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
+    use_async=False
+)
 
 if __name__ == "__main__":
     serve_playground_app("multimodal_agent:app", reload=True)
diff --git a/cookbook/providers/google/flash_thinking.py b/cookbook/providers/google/flash_thinking.py
@@ -0,0 +1,12 @@
+from phi.agent import Agent
+from phi.model.google import Gemini
+
+task = (
+    "Three missionaries and three cannibals need to cross a river. "
+    "They have a boat that can carry up to two people at a time. "
+    "If, at any time, the cannibals outnumber the missionaries on either side of the river, the cannibals will eat the missionaries. "
+    "How can all six people get across the river safely? Provide a step-by-step solution and show the solutions as an ascii diagram"
+)
+
+agent = Agent(model=Gemini(id="gemini-2.0-flash-thinking-exp-1219"), markdown=True)
+agent.print_response(task, stream=True)
diff --git a/cookbook/providers/ollama/agent_set_client.py b/cookbook/providers/ollama/agent_set_client.py
@@ -0,0 +1,18 @@
+"""Run `pip install yfinance` to install dependencies."""
+
+from ollama import Client as OllamaClient
+from phi.agent import Agent, RunResponse  # noqa
+from phi.model.ollama import Ollama
+from phi.playground import Playground, serve_playground_app
+from phi.tools.yfinance import YFinanceTools
+
+agent = Agent(
+    model=Ollama(id="llama3.1:8b", client=OllamaClient()),
+    tools=[YFinanceTools(stock_price=True)],
+    markdown=True,
+)
+
+app = Playground(agents=[agent]).get_app()
+
+if __name__ == "__main__":
+    serve_playground_app("agent_set_client:app", reload=True)
diff --git a/cookbook/providers/ollama/agent_stream.py b/cookbook/providers/ollama/agent_stream.py
@@ -3,6 +3,7 @@
 from typing import Iterator  # noqa
 from phi.agent import Agent, RunResponse  # noqa
 from phi.model.ollama import Ollama
+from phi.tools.crawl4ai_tools import Crawl4aiTools
 from phi.tools.yfinance import YFinanceTools
 
 agent = Agent(
@@ -20,3 +21,10 @@
 
 # Print the response in the terminal
 agent.print_response("What are analyst recommendations for NVDA and TSLA", stream=True)
+
+
+agent = Agent(model=Ollama(id="llama3.1:8b"), tools=[Crawl4aiTools(max_length=1000)], show_tool_calls=True)
+agent.print_response(
+    "Summarize me the key points in bullet points of this: https://blog.google/products/gemini/google-gemini-deep-research/",
+    stream=True,
+)
diff --git a/cookbook/readers/__init__.py b/cookbook/readers/__init__.py
diff --git a/cookbook/run_mysql.sh b/cookbook/run_mysql.sh
@@ -0,0 +1,10 @@
+docker run -d \
+  -e MYSQL_ROOT_PASSWORD=phi \
+  -e MYSQL_DATABASE=phi \
+  -e MYSQL_USER=phi \
+  -e MYSQL_PASSWORD=phi \
+  -p 3306:3306 \
+  -v mysql_data:/var/lib/mysql \
+  -v $(pwd)/cookbook/mysql-init:/docker-entrypoint-initdb.d \
+  --name mysql \
+  mysql:8.0
diff --git a/cookbook/storage/json_storage.py b/cookbook/storage/json_storage.py
@@ -0,0 +1,13 @@
+"""Run `pip install duckduckgo-search openai` to install dependencies."""
+
+from phi.agent import Agent
+from phi.tools.duckduckgo import DuckDuckGo
+from phi.storage.agent.json import JsonFileAgentStorage
+
+agent = Agent(
+    storage=JsonFileAgentStorage(dir_path="tmp/agent_sessions_json"),
+    tools=[DuckDuckGo()],
+    add_history_to_messages=True,
+)
+agent.print_response("How many people live in Canada?")
+agent.print_response("What is their national anthem called?")
diff --git a/cookbook/storage/yaml_storage.py b/cookbook/storage/yaml_storage.py
@@ -0,0 +1,13 @@
+"""Run `pip install duckduckgo-search openai` to install dependencies."""
+
+from phi.agent import Agent
+from phi.tools.duckduckgo import DuckDuckGo
+from phi.storage.agent.yaml import YamlFileAgentStorage
+
+agent = Agent(
+    storage=YamlFileAgentStorage(dir_path="tmp/agent_sessions_yaml"),
+    tools=[DuckDuckGo()],
+    add_history_to_messages=True,
+)
+agent.print_response("How many people live in Canada?")
+agent.print_response("What is their national anthem called?")
diff --git a/cookbook/tools/composio_tools.py b/cookbook/tools/composio_tools.py
@@ -1,7 +1,6 @@
 from phi.agent import Agent
 from composio_phidata import Action, ComposioToolSet  # type: ignore
 
-
 toolset = ComposioToolSet()
 composio_tools = toolset.get_tools(actions=[Action.GITHUB_STAR_A_REPOSITORY_FOR_THE_AUTHENTICATED_USER])
 

diff --git a/cookbook/tools/confluence_tools.py b/cookbook/tools/confluence_tools.py
@@ -0,0 +1,22 @@
+from phi.agent import Agent
+from phi.tools.confluence import ConfluenceTools
+
+
+agent = Agent(
+    name="Confluence agent",
+    tools=[ConfluenceTools()],
+    show_tool_calls=True,
+    markdown=True,
+)
+
+## getting space details
+agent.print_response("How many spaces are there and what are their names?")
+
+## getting page_content
+agent.print_response("What is the content present in page 'Large language model in LLM space'")
+
+## getting page details in a particular space
+agent.print_response("Can you extract all the page names from 'LLM' space")
+
+## creating a new page in a space
+agent.print_response("Can you create a new page named 'TESTING' in 'LLM' space")
diff --git a/cookbook/tools/elevenlabs_tools.py b/cookbook/tools/elevenlabs_tools.py
@@ -0,0 +1,32 @@
+"""
+pip install elevenlabs
+"""
+
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.eleven_labs_tools import ElevenLabsTools
+
+audio_agent = Agent(
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[
+        ElevenLabsTools(
+            voice_id="21m00Tcm4TlvDq8ikWAM", model_id="eleven_multilingual_v2", target_directory="audio_generations"
+        )
+    ],
+    description="You are an AI agent that can generate audio using the ElevenLabs API.",
+    instructions=[
+        "When the user asks you to generate audio, use the `generate_audio` tool to generate the audio.",
+        "You'll generate the appropriate prompt to send to the tool to generate audio.",
+        "You don't need to find the appropriate voice first, I already specified the voice to user."
+        "Return the audio file name in your response. Don't convert it to markdown.",
+        "The audio should be long and detailed.",
+    ],
+    markdown=True,
+    debug_mode=True,
+    show_tool_calls=True,
+)
+
+audio_agent.print_response("Generate a very long audio of history of french revolution")
+
+
+audio_agent.print_response("Generate a kick sound effect")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -48,4 +48,6 @@ data.db

		.ipynb_checkpoints

		audio_generations

		*.db