-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1532 from phidatahq/lumalabs-video-generation
Add LumaLabs Video Generation
- Loading branch information
Showing
5 changed files
with
223 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from phi.agent import Agent | ||
from phi.llm.openai import OpenAIChat | ||
from phi.tools.lumalab import LumaLabTools | ||
|
||
"""Create an agent specialized for Luma AI video generation""" | ||
|
||
luma_agent = Agent( | ||
name="Luma Video Agent", | ||
agent_id="luma-video-agent", | ||
llm=OpenAIChat(model="gpt-4o"), | ||
tools=[LumaLabTools()], # Using the LumaLab tool we created | ||
markdown=True, | ||
debug_mode=True, | ||
show_tool_calls=True, | ||
instructions=[ | ||
"You are an agent designed to generate videos using the Luma AI API.", | ||
"You can generate videos in two ways:", | ||
"1. Text-to-Video Generation:", | ||
" - Use the generate_video function for creating videos from text prompts", | ||
" - Default parameters: loop=False, aspect_ratio='16:9', keyframes=None", | ||
"2. Image-to-Video Generation:", | ||
" - Use the image_to_video function when starting from one or two images", | ||
" - Required parameters: prompt, start_image_url", | ||
" - Optional parameters: end_image_url, loop=False, aspect_ratio='16:9'", | ||
" - The image URLs must be publicly accessible", | ||
"Choose the appropriate function based on whether the user provides image URLs or just a text prompt.", | ||
"The video will be displayed in the UI automatically below your response, so you don't need to show the video URL in your response.", | ||
"Politely and courteously let the user know that the video has been generated and will be displayed below as soon as its ready.", | ||
"After generating any video, if generation is async (wait_for_completion=False), inform about the generation ID", | ||
], | ||
system_message=( | ||
"Use generate_video for text-to-video requests and image_to_video for image-based " | ||
"generation. Don't modify default parameters unless specifically requested. " | ||
"Always provide clear feedback about the video generation status." | ||
), | ||
) | ||
|
||
luma_agent.run("Generate a video of a car in a sky") | ||
# luma_agent.run("Transform this image into a video of a tiger walking: https://upload.wikimedia.org/wikipedia/commons/thumb/3/3f/Walking_tiger_female.jpg/1920px-Walking_tiger_female.jpg") | ||
# luma_agent.run(""" | ||
# Create a transition video between these two images: | ||
# Start: https://img.freepik.com/premium-photo/car-driving-dark-forest-generative-ai_634053-6661.jpg?w=1380 | ||
# End: https://img.freepik.com/free-photo/front-view-black-luxury-sedan-road_114579-5030.jpg?t=st=1733821884~exp=1733825484~hmac=735ca584a9b985c53875fc1ad343c3fd394e1de4db49e5ab1a9ab37ac5f91a36&w=1380 | ||
# Make it a smooth, natural movement | ||
# """) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import time | ||
import uuid | ||
from os import getenv | ||
from typing import Optional, Dict, Any, Literal, TypedDict | ||
|
||
from phi.agent import Agent | ||
from phi.tools import Toolkit | ||
from phi.utils.log import logger | ||
from phi.model.content import Video | ||
|
||
try: | ||
from lumaai import LumaAI # type: ignore | ||
except ImportError: | ||
raise ImportError("`lumaai` not installed. Please install using `pip install lumaai`") | ||
|
||
|
||
# Define types for keyframe structure | ||
class KeyframeImage(TypedDict): | ||
type: Literal["image"] | ||
url: str | ||
|
||
|
||
Keyframes = Dict[str, KeyframeImage] | ||
|
||
|
||
class LumaLabTools(Toolkit): | ||
def __init__( | ||
self, | ||
api_key: Optional[str] = None, | ||
wait_for_completion: bool = True, | ||
poll_interval: int = 3, | ||
max_wait_time: int = 300, # 5 minutes | ||
): | ||
super().__init__(name="luma_lab") | ||
|
||
self.wait_for_completion = wait_for_completion | ||
self.poll_interval = poll_interval | ||
self.max_wait_time = max_wait_time | ||
self.api_key = api_key or getenv("LUMAAI_API_KEY") | ||
|
||
if not self.api_key: | ||
logger.error("LUMAAI_API_KEY not set. Please set the LUMAAI_API_KEY environment variable.") | ||
|
||
self.client = LumaAI(auth_token=self.api_key) | ||
self.register(self.generate_video) | ||
self.register(self.image_to_video) | ||
|
||
def image_to_video( | ||
self, | ||
agent: Agent, | ||
prompt: str, | ||
start_image_url: str, | ||
end_image_url: Optional[str] = None, | ||
loop: bool = False, | ||
aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", | ||
) -> str: | ||
"""Generate a video from one or two images with a prompt. | ||
Args: | ||
agent: The agent instance | ||
prompt: Text description of the desired video | ||
start_image_url: URL of the starting image | ||
end_image_url: Optional URL of the ending image | ||
loop: Whether the video should loop | ||
aspect_ratio: Aspect ratio of the output video | ||
Returns: | ||
str: Status message or error | ||
""" | ||
|
||
try: | ||
# Construct keyframes | ||
keyframes: Dict[str, Dict[str, str]] = {"frame0": {"type": "image", "url": start_image_url}} | ||
|
||
# Add end image if provided | ||
if end_image_url: | ||
keyframes["frame1"] = {"type": "image", "url": end_image_url} | ||
|
||
# Create generation with keyframes | ||
generation = self.client.generations.create( | ||
prompt=prompt, | ||
loop=loop, | ||
aspect_ratio=aspect_ratio, | ||
keyframes=keyframes, # type: ignore | ||
) | ||
|
||
video_id = str(uuid.uuid4()) | ||
|
||
if not self.wait_for_completion: | ||
return "Async generation unsupported" | ||
|
||
# Poll for completion | ||
seconds_waited = 0 | ||
while seconds_waited < self.max_wait_time: | ||
if not generation or not generation.id: | ||
return "Failed to get generation ID" | ||
|
||
generation = self.client.generations.get(generation.id) | ||
|
||
if generation.state == "completed" and generation.assets: | ||
video_url = generation.assets.video | ||
if video_url: | ||
agent.add_video(Video(id=video_id, url=video_url, eta="completed")) | ||
return f"Video generated successfully: {video_url}" | ||
elif generation.state == "failed": | ||
return f"Generation failed: {generation.failure_reason}" | ||
|
||
logger.info(f"Generation in progress... State: {generation.state}") | ||
time.sleep(self.poll_interval) | ||
seconds_waited += self.poll_interval | ||
|
||
return f"Video generation timed out after {self.max_wait_time} seconds" | ||
|
||
except Exception as e: | ||
logger.error(f"Failed to generate video: {e}") | ||
return f"Error: {e}" | ||
|
||
def generate_video( | ||
self, | ||
agent: Agent, | ||
prompt: str, | ||
loop: bool = False, | ||
aspect_ratio: Literal["1:1", "16:9", "9:16", "4:3", "3:4", "21:9", "9:21"] = "16:9", | ||
keyframes: Optional[Dict[str, Dict[str, str]]] = None, | ||
) -> str: | ||
"""Use this function to generate a video given a prompt.""" | ||
|
||
try: | ||
generation_params: Dict[str, Any] = { | ||
"prompt": prompt, | ||
"loop": loop, | ||
"aspect_ratio": aspect_ratio, | ||
} | ||
|
||
if keyframes is not None: | ||
generation_params["keyframes"] = keyframes | ||
|
||
generation = self.client.generations.create(**generation_params) # type: ignore | ||
|
||
video_id = str(uuid.uuid4()) | ||
if not self.wait_for_completion: | ||
return "Async generation unsupported" | ||
|
||
# Poll for completion | ||
seconds_waited = 0 | ||
while seconds_waited < self.max_wait_time: | ||
if not generation or not generation.id: | ||
return "Failed to get generation ID" | ||
|
||
generation = self.client.generations.get(generation.id) | ||
|
||
if generation.state == "completed" and generation.assets: | ||
video_url = generation.assets.video | ||
if video_url: | ||
agent.add_video(Video(id=video_id, url=video_url, state="completed")) | ||
return f"Video generated successfully: {video_url}" | ||
elif generation.state == "failed": | ||
return f"Generation failed: {generation.failure_reason}" | ||
|
||
logger.info(f"Generation in progress... State: {generation.state}") | ||
time.sleep(self.poll_interval) | ||
seconds_waited += self.poll_interval | ||
|
||
return f"Video generation timed out after {self.max_wait_time} seconds" | ||
|
||
except Exception as e: | ||
logger.error(f"Failed to generate video: {e}") | ||
return f"Error: {e}" |