Skip to content

Commit 908fd7d

Browse files
authored
feat(api-nodes): new TextToVideoWithAudio and ImageToVideoWithAudio nodes (#11267)
1 parent 5495589 commit 908fd7d

File tree

2 files changed

+174
-23
lines changed

2 files changed

+174
-23
lines changed

comfy_api_nodes/apis/kling_api.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,25 @@ class TaskStatusImageResult(BaseModel):
5151
url: str = Field(..., description="URL for generated image")
5252

5353

54-
class OmniTaskStatusResults(BaseModel):
54+
class TaskStatusResults(BaseModel):
5555
videos: list[TaskStatusVideoResult] | None = Field(None)
5656
images: list[TaskStatusImageResult] | None = Field(None)
5757

5858

59-
class OmniTaskStatusResponseData(BaseModel):
59+
class TaskStatusResponseData(BaseModel):
6060
created_at: int | None = Field(None, description="Task creation time")
6161
updated_at: int | None = Field(None, description="Task update time")
6262
task_status: str | None = None
6363
task_status_msg: str | None = Field(None, description="Additional failure reason. Only for polling endpoint.")
6464
task_id: str | None = Field(None, description="Task ID")
65-
task_result: OmniTaskStatusResults | None = Field(None)
65+
task_result: TaskStatusResults | None = Field(None)
6666

6767

68-
class OmniTaskStatusResponse(BaseModel):
68+
class TaskStatusResponse(BaseModel):
6969
code: int | None = Field(None, description="Error code")
7070
message: str | None = Field(None, description="Error message")
7171
request_id: str | None = Field(None, description="Request ID")
72-
data: OmniTaskStatusResponseData | None = Field(None)
72+
data: TaskStatusResponseData | None = Field(None)
7373

7474

7575
class OmniImageParamImage(BaseModel):
@@ -84,3 +84,21 @@ class OmniProImageRequest(BaseModel):
8484
mode: str = Field("pro")
8585
n: int | None = Field(1, le=9)
8686
image_list: list[OmniImageParamImage] | None = Field(..., max_length=10)
87+
88+
89+
class TextToVideoWithAudioRequest(BaseModel):
90+
model_name: str = Field(..., description="kling-v2-6")
91+
aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'")
92+
duration: str = Field(..., description="'5' or '10'")
93+
prompt: str = Field(...)
94+
mode: str = Field("pro")
95+
sound: str = Field(..., description="'on' or 'off'")
96+
97+
98+
class ImageToVideoWithAudioRequest(BaseModel):
99+
model_name: str = Field(..., description="kling-v2-6")
100+
image: str = Field(...)
101+
duration: str = Field(..., description="'5' or '10'")
102+
prompt: str = Field(...)
103+
mode: str = Field("pro")
104+
sound: str = Field(..., description="'on' or 'off'")

comfy_api_nodes/nodes_kling.py

Lines changed: 151 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,16 @@
5050
KlingSingleImageEffectModelName,
5151
)
5252
from comfy_api_nodes.apis.kling_api import (
53+
ImageToVideoWithAudioRequest,
5354
OmniImageParamImage,
5455
OmniParamImage,
5556
OmniParamVideo,
5657
OmniProFirstLastFrameRequest,
5758
OmniProImageRequest,
5859
OmniProReferences2VideoRequest,
5960
OmniProText2VideoRequest,
60-
OmniTaskStatusResponse,
61+
TaskStatusResponse,
62+
TextToVideoWithAudioRequest,
6163
)
6264
from comfy_api_nodes.util import (
6365
ApiEndpoint,
@@ -242,15 +244,15 @@ def _video_repl(match):
242244
return re.sub(r"(?<!\w)@video(?P<idx>\d*)(?!\w)", _video_repl, prompt)
243245

244246

245-
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: OmniTaskStatusResponse) -> IO.NodeOutput:
247+
async def finish_omni_video_task(cls: type[IO.ComfyNode], response: TaskStatusResponse) -> IO.NodeOutput:
246248
if response.code:
247249
raise RuntimeError(
248250
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
249251
)
250252
final_response = await poll_op(
251253
cls,
252254
ApiEndpoint(path=f"/proxy/kling/v1/videos/omni-video/{response.data.task_id}"),
253-
response_model=OmniTaskStatusResponse,
255+
response_model=TaskStatusResponse,
254256
status_extractor=lambda r: (r.data.task_status if r.data else None),
255257
max_poll_attempts=160,
256258
)
@@ -483,12 +485,12 @@ async def execute_image2video(
483485
task_id = task_creation_response.data.task_id
484486

485487
final_response = await poll_op(
486-
cls,
487-
ApiEndpoint(path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}"),
488-
response_model=KlingImage2VideoResponse,
489-
estimated_duration=AVERAGE_DURATION_I2V,
490-
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
491-
)
488+
cls,
489+
ApiEndpoint(path=f"{PATH_IMAGE_TO_VIDEO}/{task_id}"),
490+
response_model=KlingImage2VideoResponse,
491+
estimated_duration=AVERAGE_DURATION_I2V,
492+
status_extractor=lambda r: (r.data.task_status.value if r.data and r.data.task_status else None),
493+
)
492494
validate_video_result_response(final_response)
493495

494496
video = get_video_from_response(final_response)
@@ -834,7 +836,7 @@ async def execute(
834836
response = await sync_op(
835837
cls,
836838
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
837-
response_model=OmniTaskStatusResponse,
839+
response_model=TaskStatusResponse,
838840
data=OmniProText2VideoRequest(
839841
model_name=model_name,
840842
prompt=prompt,
@@ -929,7 +931,7 @@ async def execute(
929931
response = await sync_op(
930932
cls,
931933
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
932-
response_model=OmniTaskStatusResponse,
934+
response_model=TaskStatusResponse,
933935
data=OmniProFirstLastFrameRequest(
934936
model_name=model_name,
935937
prompt=prompt,
@@ -997,7 +999,7 @@ async def execute(
997999
response = await sync_op(
9981000
cls,
9991001
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
1000-
response_model=OmniTaskStatusResponse,
1002+
response_model=TaskStatusResponse,
10011003
data=OmniProReferences2VideoRequest(
10021004
model_name=model_name,
10031005
prompt=prompt,
@@ -1081,7 +1083,7 @@ async def execute(
10811083
response = await sync_op(
10821084
cls,
10831085
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
1084-
response_model=OmniTaskStatusResponse,
1086+
response_model=TaskStatusResponse,
10851087
data=OmniProReferences2VideoRequest(
10861088
model_name=model_name,
10871089
prompt=prompt,
@@ -1162,7 +1164,7 @@ async def execute(
11621164
response = await sync_op(
11631165
cls,
11641166
ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"),
1165-
response_model=OmniTaskStatusResponse,
1167+
response_model=TaskStatusResponse,
11661168
data=OmniProReferences2VideoRequest(
11671169
model_name=model_name,
11681170
prompt=prompt,
@@ -1237,7 +1239,7 @@ async def execute(
12371239
response = await sync_op(
12381240
cls,
12391241
ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"),
1240-
response_model=OmniTaskStatusResponse,
1242+
response_model=TaskStatusResponse,
12411243
data=OmniProImageRequest(
12421244
model_name=model_name,
12431245
prompt=prompt,
@@ -1253,7 +1255,7 @@ async def execute(
12531255
final_response = await poll_op(
12541256
cls,
12551257
ApiEndpoint(path=f"/proxy/kling/v1/images/omni-image/{response.data.task_id}"),
1256-
response_model=OmniTaskStatusResponse,
1258+
response_model=TaskStatusResponse,
12571259
status_extractor=lambda r: (r.data.task_status if r.data else None),
12581260
)
12591261
return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url))
@@ -1328,9 +1330,8 @@ class KlingImage2VideoNode(IO.ComfyNode):
13281330
def define_schema(cls) -> IO.Schema:
13291331
return IO.Schema(
13301332
node_id="KlingImage2VideoNode",
1331-
display_name="Kling Image to Video",
1333+
display_name="Kling Image(First Frame) to Video",
13321334
category="api node/video/Kling",
1333-
description="Kling Image to Video Node",
13341335
inputs=[
13351336
IO.Image.Input("start_frame", tooltip="The reference image used to generate the video."),
13361337
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
@@ -2034,6 +2035,136 @@ async def execute(
20342035
return IO.NodeOutput(await image_result_to_node_output(images))
20352036

20362037

2038+
class TextToVideoWithAudio(IO.ComfyNode):
2039+
2040+
@classmethod
2041+
def define_schema(cls) -> IO.Schema:
2042+
return IO.Schema(
2043+
node_id="KlingTextToVideoWithAudio",
2044+
display_name="Kling Text to Video with Audio",
2045+
category="api node/video/Kling",
2046+
inputs=[
2047+
IO.Combo.Input("model_name", options=["kling-v2-6"]),
2048+
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
2049+
IO.Combo.Input("mode", options=["pro"]),
2050+
IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
2051+
IO.Combo.Input("duration", options=[5, 10]),
2052+
IO.Boolean.Input("generate_audio", default=True),
2053+
],
2054+
outputs=[
2055+
IO.Video.Output(),
2056+
],
2057+
hidden=[
2058+
IO.Hidden.auth_token_comfy_org,
2059+
IO.Hidden.api_key_comfy_org,
2060+
IO.Hidden.unique_id,
2061+
],
2062+
is_api_node=True,
2063+
)
2064+
2065+
@classmethod
2066+
async def execute(
2067+
cls,
2068+
model_name: str,
2069+
prompt: str,
2070+
mode: str,
2071+
aspect_ratio: str,
2072+
duration: int,
2073+
generate_audio: bool,
2074+
) -> IO.NodeOutput:
2075+
validate_string(prompt, min_length=1, max_length=2500)
2076+
response = await sync_op(
2077+
cls,
2078+
ApiEndpoint(path="/proxy/kling/v1/videos/text2video", method="POST"),
2079+
response_model=TaskStatusResponse,
2080+
data=TextToVideoWithAudioRequest(
2081+
model_name=model_name,
2082+
prompt=prompt,
2083+
mode=mode,
2084+
aspect_ratio=aspect_ratio,
2085+
duration=str(duration),
2086+
sound="on" if generate_audio else "off",
2087+
),
2088+
)
2089+
if response.code:
2090+
raise RuntimeError(
2091+
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
2092+
)
2093+
final_response = await poll_op(
2094+
cls,
2095+
ApiEndpoint(path=f"/proxy/kling/v1/videos/text2video/{response.data.task_id}"),
2096+
response_model=TaskStatusResponse,
2097+
status_extractor=lambda r: (r.data.task_status if r.data else None),
2098+
)
2099+
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
2100+
2101+
2102+
class ImageToVideoWithAudio(IO.ComfyNode):
2103+
2104+
@classmethod
2105+
def define_schema(cls) -> IO.Schema:
2106+
return IO.Schema(
2107+
node_id="KlingImageToVideoWithAudio",
2108+
display_name="Kling Image(First Frame) to Video with Audio",
2109+
category="api node/video/Kling",
2110+
inputs=[
2111+
IO.Combo.Input("model_name", options=["kling-v2-6"]),
2112+
IO.Image.Input("start_frame"),
2113+
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
2114+
IO.Combo.Input("mode", options=["pro"]),
2115+
IO.Combo.Input("duration", options=[5, 10]),
2116+
IO.Boolean.Input("generate_audio", default=True),
2117+
],
2118+
outputs=[
2119+
IO.Video.Output(),
2120+
],
2121+
hidden=[
2122+
IO.Hidden.auth_token_comfy_org,
2123+
IO.Hidden.api_key_comfy_org,
2124+
IO.Hidden.unique_id,
2125+
],
2126+
is_api_node=True,
2127+
)
2128+
2129+
@classmethod
2130+
async def execute(
2131+
cls,
2132+
model_name: str,
2133+
start_frame: Input.Image,
2134+
prompt: str,
2135+
mode: str,
2136+
duration: int,
2137+
generate_audio: bool,
2138+
) -> IO.NodeOutput:
2139+
validate_string(prompt, min_length=1, max_length=2500)
2140+
validate_image_dimensions(start_frame, min_width=300, min_height=300)
2141+
validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1))
2142+
response = await sync_op(
2143+
cls,
2144+
ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"),
2145+
response_model=TaskStatusResponse,
2146+
data=ImageToVideoWithAudioRequest(
2147+
model_name=model_name,
2148+
image=(await upload_images_to_comfyapi(cls, start_frame))[0],
2149+
prompt=prompt,
2150+
mode=mode,
2151+
duration=str(duration),
2152+
sound="on" if generate_audio else "off",
2153+
),
2154+
)
2155+
if response.code:
2156+
raise RuntimeError(
2157+
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
2158+
)
2159+
final_response = await poll_op(
2160+
cls,
2161+
ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"),
2162+
response_model=TaskStatusResponse,
2163+
status_extractor=lambda r: (r.data.task_status if r.data else None),
2164+
)
2165+
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
2166+
2167+
20372168
class KlingExtension(ComfyExtension):
20382169
@override
20392170
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -2057,6 +2188,8 @@ async def get_node_list(self) -> list[type[IO.ComfyNode]]:
20572188
OmniProVideoToVideoNode,
20582189
OmniProEditVideoNode,
20592190
OmniProImageNode,
2191+
TextToVideoWithAudio,
2192+
ImageToVideoWithAudio,
20602193
]
20612194

20622195

0 commit comments

Comments
 (0)