Skip to content

Commit

Permalink
Merge pull request #1532 from phidatahq/lumalabs-video-generation
Browse files Browse the repository at this point in the history
Add LumaLabs Video Generation
  • Loading branch information
dirkbrnd authored Dec 11, 2024
2 parents 6a60da1 + d10bc9c commit 51e8ac0
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 7 deletions.
9 changes: 4 additions & 5 deletions cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from phi.playground import Playground, serve_playground_app
from phi.storage.agent.sqlite import SqlAgentStorage
from phi.tools.fal_tools import FalTools
from pydantic import BaseModel, Field

image_agent_storage_file: str = "tmp/image_agent.db"

Expand All @@ -26,7 +25,7 @@
description="You are an AI agent that can generate images using DALL-E.",
instructions=[
"When the user asks you to create an image, use the `create_image` tool to create the image.",
"Don't provide the URL of the image in the response. Only describe what image was generated."
"Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
Expand All @@ -43,7 +42,7 @@
description="You are an AI agent that can generate gifs using the ModelsLabs API.",
instructions=[
"When the user asks you to create an image, use the `generate_media` tool to create the image.",
"Don't provide the URL of the image in the response. Only describe what image was generated."
"Don't provide the URL of the image in the response. Only describe what image was generated.",
],
markdown=True,
debug_mode=True,
Expand All @@ -60,7 +59,7 @@
description="You are an AI agent that can generate videos using the ModelsLabs API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"Don't provide the URL of the video in the response. Only describe what video was generated."
"Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
Expand All @@ -77,7 +76,7 @@
description="You are an AI agent that can generate videos using the Fal API.",
instructions=[
"When the user asks you to create a video, use the `generate_media` tool to create the video.",
"Don't provide the URL of the video in the response. Only describe what video was generated."
"Don't provide the URL of the video in the response. Only describe what video was generated.",
],
markdown=True,
debug_mode=True,
Expand Down
45 changes: 45 additions & 0 deletions cookbook/tools/lumalabs_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from phi.agent import Agent
from phi.llm.openai import OpenAIChat
from phi.tools.lumalab import LumaLabTools

"""Create an agent specialized for Luma AI video generation"""

luma_agent = Agent(
name="Luma Video Agent",
agent_id="luma-video-agent",
llm=OpenAIChat(model="gpt-4o"),
tools=[LumaLabTools()], # Using the LumaLab tool we created
markdown=True,
debug_mode=True,
show_tool_calls=True,
instructions=[
"You are an agent designed to generate videos using the Luma AI API.",
"You can generate videos in two ways:",
"1. Text-to-Video Generation:",
" - Use the generate_video function for creating videos from text prompts",
" - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None",
"2. Image-to-Video Generation:",
" - Use the image_to_video function when starting from one or two images",
" - Required parameters: prompt, start_image_url",
" - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'",
" - The image URLs must be publicly accessible",
"Choose the appropriate function based on whether the user provides image URLs or just a text prompt.",
"The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.",
"Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.",
"After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID",
],
system_message=(
"Use generate_video for text-to-video requests and image_to_video for image-based "
"generation. Don't modify default parameters unless specifically requested. "
"Always provide clear feedback about the video generation status."
),
)

luma_agent.run("Generate a video of a car in a sky")
# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg")
# luma_agent.run("""
# Create a transition video between these two images:
# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380
# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380
# Make it a smooth, natural movement
# """)
4 changes: 3 additions & 1 deletion phi/llm/openai/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty:
_dict["presence_penalty"] = self.presence_penalty
if self.response_format:
_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
_dict["response_format"] = (
self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
)
if self.seed is not None:
_dict["seed"] = self.seed
if self.stop:
Expand Down
4 changes: 3 additions & 1 deletion phi/model/openai/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ def to_dict(self) -> Dict[str, Any]:
if self.presence_penalty is not None:
model_dict["presence_penalty"] = self.presence_penalty
if self.response_format is not None:
model_dict["response_format"] = self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
model_dict["response_format"] = (
self.response_format if isinstance(self.response_format, dict) else str(self.response_format)
)
if self.seed is not None:
model_dict["seed"] = self.seed
if self.stop is not None:
Expand Down
168 changes: 168 additions & 0 deletions phi/tools/lumalab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import time
import uuid
from os import getenv
from typing import Optional, Dict, Any, Literal, TypedDict

from phi.agent import Agent
from phi.tools import Toolkit
from phi.utils.log import logger
from phi.model.content import Video

try:
from lumaai import LumaAI # type: ignore
except ImportError:
raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`")


# Define types for keyframe structure
class KeyframeImage(TypedDict):
type: Literal["image"]
url: str


Keyframes = Dict[str, KeyframeImage]


class LumaLabTools(Toolkit):
def __init__(
self,
api_key: Optional[str] = None,
wait_for_completion: bool = True,
poll_interval: int = 3,
max_wait_time: int = 300, # 5 minutes
):
super().__init__(name="luma_lab")

self.wait_for_completion = wait_for_completion
self.poll_interval = poll_interval
self.max_wait_time = max_wait_time
self.api_key = api_key or getenv("LUMAAI_API_KEY")

if not self.api_key:
logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.")

self.client = LumaAI(auth_token=self.api_key)
self.register(self.generate_video)
self.register(self.image_to_video)

def image_to_video(
self,
agent: Agent,
prompt: str,
start_image_url: str,
end_image_url: Optional[str] = None,
loop: bool = False,
aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
) -> str:
"""Generate a video from one or two images with a prompt.
Args:
agent: The agent instance
prompt: Text description of the desired video
start_image_url: URL of the starting image
end_image_url: Optional URL of the ending image
loop: Whether the video should loop
aspect_ratio: Aspect ratio of the output video
Returns:
str: Status message or error
"""

try:
# Construct keyframes
keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}}

# Add end image if provided
if end_image_url:
keyframes["frame1"] = {"type": "image", "url": end_image_url}

# Create generation with keyframes
generation = self.client.generations.create(
prompt=prompt,
loop=loop,
aspect_ratio=aspect_ratio,
keyframes=keyframes, # type: ignore
)

video_id = str(uuid.uuid4())

if not self.wait_for_completion:
return "Async generation unsupported"

# Poll for completion
seconds_waited = 0
while seconds_waited < self.max_wait_time:
if not generation or not generation.id:
return "Failed to get generation ID"

generation = self.client.generations.get(generation.id)

if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
agent.add_video(Video(id=video_id, url=video_url, eta="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"

logger.info(f"Generation in progress... State: {generation.state}")
time.sleep(self.poll_interval)
seconds_waited += self.poll_interval

return f"Video generation timed out after {self.max_wait_time} seconds"

except Exception as e:
logger.error(f"Failed to generate video: {e}")
return f"Error: {e}"

def generate_video(
self,
agent: Agent,
prompt: str,
loop: bool = False,
aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9",
keyframes: Optional[Dict[str, Dict[str, str]]] = None,
) -> str:
"""Use this function to generate a video given a prompt."""

try:
generation_params: Dict[str, Any] = {
"prompt": prompt,
"loop": loop,
"aspect_ratio": aspect_ratio,
}

if keyframes is not None:
generation_params["keyframes"] = keyframes

generation = self.client.generations.create(**generation_params) # type: ignore

video_id = str(uuid.uuid4())
if not self.wait_for_completion:
return "Async generation unsupported"

# Poll for completion
seconds_waited = 0
while seconds_waited < self.max_wait_time:
if not generation or not generation.id:
return "Failed to get generation ID"

generation = self.client.generations.get(generation.id)

if generation.state == "completed" and generation.assets:
video_url = generation.assets.video
if video_url:
agent.add_video(Video(id=video_id, url=video_url, state="completed"))
return f"Video generated successfully: {video_url}"
elif generation.state == "failed":
return f"Generation failed: {generation.failure_reason}"

logger.info(f"Generation in progress... State: {generation.state}")
time.sleep(self.poll_interval)
seconds_waited += self.poll_interval

return f"Video generation timed out after {self.max_wait_time} seconds"

except Exception as e:
logger.error(f"Failed to generate video: {e}")
return f"Error: {e}"

0 comments on commit 51e8ac0

Please sign in to comment.