Skip to content

Commit 3a9926d

Browse files
committed
feat(api-nodes): add GPT-Image-1.5
1 parent 827bb15 commit 3a9926d

File tree

3 files changed

+153
-98
lines changed

3 files changed

+153
-98
lines changed

comfy_api_nodes/apis/openai_api.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from pydantic import BaseModel, Field
2+
3+
4+
class Datum2(BaseModel):
5+
b64_json: str | None = Field(None, description="Base64 encoded image data")
6+
revised_prompt: str | None = Field(None, description="Revised prompt")
7+
url: str | None = Field(None, description="URL of the image")
8+
9+
10+
class InputTokensDetails(BaseModel):
11+
image_tokens: int | None = None
12+
text_tokens: int | None = None
13+
14+
15+
class Usage(BaseModel):
16+
input_tokens: int | None = None
17+
input_tokens_details: InputTokensDetails | None = None
18+
output_tokens: int | None = None
19+
total_tokens: int | None = None
20+
21+
22+
class OpenAIImageGenerationResponse(BaseModel):
23+
data: list[Datum2] | None = None
24+
usage: Usage | None = None
25+
26+
27+
class OpenAIImageEditRequest(BaseModel):
28+
background: str | None = Field(None, description="Background transparency")
29+
model: str = Field(...)
30+
moderation: str | None = Field(None)
31+
n: int | None = Field(None, description="The number of images to generate")
32+
output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
33+
output_format: str | None = Field(None)
34+
prompt: str = Field(...)
35+
quality: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
36+
size: str | None = Field(None, description="Size of the output image")
37+
38+
39+
class OpenAIImageGenerationRequest(BaseModel):
40+
background: str | None = Field(None, description="Background transparency")
41+
model: str | None = Field(None)
42+
moderation: str | None = Field(None)
43+
n: int | None = Field(
44+
None,
45+
description="The number of images to generate.",
46+
)
47+
output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
48+
output_format: str | None = Field(None)
49+
prompt: str = Field(...)
50+
quality: str | None = Field(None, description="The quality of the generated image")
51+
size: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
52+
style: str | None = Field(None, description="Style of the image (only for dall-e-3)")

comfy_api_nodes/nodes_openai.py

Lines changed: 100 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,45 @@
1-
from io import BytesIO
1+
import base64
22
import os
33
from enum import Enum
4-
from inspect import cleandoc
4+
from io import BytesIO
5+
56
import numpy as np
67
import torch
78
from PIL import Image
8-
import folder_paths
9-
import base64
10-
from comfy_api.latest import IO, ComfyExtension
119
from typing_extensions import override
1210

13-
11+
import folder_paths
12+
from comfy_api.latest import IO, ComfyExtension, Input
1413
from comfy_api_nodes.apis import (
15-
OpenAIImageGenerationRequest,
16-
OpenAIImageEditRequest,
17-
OpenAIImageGenerationResponse,
18-
OpenAICreateResponse,
19-
OpenAIResponse,
2014
CreateModelResponseProperties,
21-
Item,
22-
OutputContent,
23-
InputImageContent,
2415
Detail,
25-
InputTextContent,
26-
InputMessage,
27-
InputMessageContentList,
2816
InputContent,
2917
InputFileContent,
18+
InputImageContent,
19+
InputMessage,
20+
InputMessageContentList,
21+
InputTextContent,
22+
Item,
23+
OpenAICreateResponse,
24+
OpenAIResponse,
25+
OutputContent,
26+
)
27+
from comfy_api_nodes.apis.openai_api import (
28+
OpenAIImageEditRequest,
29+
OpenAIImageGenerationRequest,
30+
OpenAIImageGenerationResponse,
3031
)
31-
3232
from comfy_api_nodes.util import (
33-
downscale_image_tensor,
34-
download_url_to_bytesio,
35-
validate_string,
36-
tensor_to_base64_string,
3733
ApiEndpoint,
38-
sync_op,
34+
download_url_to_bytesio,
35+
downscale_image_tensor,
3936
poll_op,
37+
sync_op,
38+
tensor_to_base64_string,
4039
text_filepath_to_data_uri,
40+
validate_string,
4141
)
4242

43-
4443
RESPONSES_ENDPOINT = "/proxy/openai/v1/responses"
4544
STARTING_POINT_ID_PATTERN = r"<starting_point_id:(.*)>"
4645

@@ -98,17 +97,14 @@ async def validate_and_cast_response(response, timeout: int = None) -> torch.Ten
9897

9998

10099
class OpenAIDalle2(IO.ComfyNode):
101-
"""
102-
Generates images synchronously via OpenAI's DALL·E 2 endpoint.
103-
"""
104100

105101
@classmethod
106102
def define_schema(cls):
107103
return IO.Schema(
108104
node_id="OpenAIDalle2",
109105
display_name="OpenAI DALL·E 2",
110106
category="api node/image/OpenAI",
111-
description=cleandoc(cls.__doc__ or ""),
107+
description="Generates images synchronously via OpenAI's DALL·E 2 endpoint.",
112108
inputs=[
113109
IO.String.Input(
114110
"prompt",
@@ -234,17 +230,14 @@ async def execute(
234230

235231

236232
class OpenAIDalle3(IO.ComfyNode):
237-
"""
238-
Generates images synchronously via OpenAI's DALL·E 3 endpoint.
239-
"""
240233

241234
@classmethod
242235
def define_schema(cls):
243236
return IO.Schema(
244237
node_id="OpenAIDalle3",
245238
display_name="OpenAI DALL·E 3",
246239
category="api node/image/OpenAI",
247-
description=cleandoc(cls.__doc__ or ""),
240+
description="Generates images synchronously via OpenAI's DALL·E 3 endpoint.",
248241
inputs=[
249242
IO.String.Input(
250243
"prompt",
@@ -327,23 +320,20 @@ async def execute(
327320

328321

329322
class OpenAIGPTImage1(IO.ComfyNode):
330-
"""
331-
Generates images synchronously via OpenAI's GPT Image 1 endpoint.
332-
"""
333323

334324
@classmethod
335325
def define_schema(cls):
336326
return IO.Schema(
337327
node_id="OpenAIGPTImage1",
338328
display_name="OpenAI GPT Image 1",
339329
category="api node/image/OpenAI",
340-
description=cleandoc(cls.__doc__ or ""),
330+
description="Generates images synchronously via OpenAI's GPT Image 1 endpoint.",
341331
inputs=[
342332
IO.String.Input(
343333
"prompt",
344334
default="",
345335
multiline=True,
346-
tooltip="Text prompt for GPT Image 1",
336+
tooltip="Text prompt for GPT Image",
347337
),
348338
IO.Int.Input(
349339
"seed",
@@ -365,8 +355,8 @@ def define_schema(cls):
365355
),
366356
IO.Combo.Input(
367357
"background",
368-
default="opaque",
369-
options=["opaque", "transparent"],
358+
default="auto",
359+
options=["auto", "opaque", "transparent"],
370360
tooltip="Return image with or without background",
371361
optional=True,
372362
),
@@ -397,6 +387,11 @@ def define_schema(cls):
397387
tooltip="Optional mask for inpainting (white areas will be replaced)",
398388
optional=True,
399389
),
390+
IO.Combo.Input(
391+
"model",
392+
options=["gpt-image-1", "gpt-image-1.5"],
393+
optional=True,
394+
),
400395
],
401396
outputs=[
402397
IO.Image.Output(),
@@ -412,32 +407,27 @@ def define_schema(cls):
412407
@classmethod
413408
async def execute(
414409
cls,
415-
prompt,
416-
seed=0,
417-
quality="low",
418-
background="opaque",
419-
image=None,
420-
mask=None,
421-
n=1,
422-
size="1024x1024",
410+
prompt: str,
411+
seed: int = 0,
412+
quality: str = "low",
413+
background: str = "opaque",
414+
image: Input.Image | None = None,
415+
mask: Input.Image | None = None,
416+
n: int = 1,
417+
size: str = "1024x1024",
418+
model: str = "gpt-image-1",
423419
) -> IO.NodeOutput:
424420
validate_string(prompt, strip_whitespace=False)
425-
model = "gpt-image-1"
426-
path = "/proxy/openai/images/generations"
427-
content_type = "application/json"
428-
request_class = OpenAIImageGenerationRequest
429-
files = []
430421

431-
if image is not None:
432-
path = "/proxy/openai/images/edits"
433-
request_class = OpenAIImageEditRequest
434-
content_type = "multipart/form-data"
422+
if mask is not None and image is None:
423+
raise ValueError("Cannot use a mask without an input image")
435424

425+
if image is not None:
426+
files = []
436427
batch_size = image.shape[0]
437-
438428
for i in range(batch_size):
439-
single_image = image[i : i + 1]
440-
scaled_image = downscale_image_tensor(single_image).squeeze()
429+
single_image = image[i: i + 1]
430+
scaled_image = downscale_image_tensor(single_image, total_pixels=2048*2048).squeeze()
441431

442432
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
443433
img = Image.fromarray(image_np)
@@ -450,44 +440,57 @@ async def execute(
450440
else:
451441
files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
452442

453-
if mask is not None:
454-
if image is None:
455-
raise Exception("Cannot use a mask without an input image")
456-
if image.shape[0] != 1:
457-
raise Exception("Cannot use a mask with multiple image")
458-
if mask.shape[1:] != image.shape[1:-1]:
459-
raise Exception("Mask and Image must be the same size")
460-
batch, height, width = mask.shape
461-
rgba_mask = torch.zeros(height, width, 4, device="cpu")
462-
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
463-
464-
scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()
465-
466-
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
467-
mask_img = Image.fromarray(mask_np)
468-
mask_img_byte_arr = BytesIO()
469-
mask_img.save(mask_img_byte_arr, format="PNG")
470-
mask_img_byte_arr.seek(0)
471-
files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
472-
473-
# Build the operation
474-
response = await sync_op(
475-
cls,
476-
ApiEndpoint(path=path, method="POST"),
477-
response_model=OpenAIImageGenerationResponse,
478-
data=request_class(
479-
model=model,
480-
prompt=prompt,
481-
quality=quality,
482-
background=background,
483-
n=n,
484-
seed=seed,
485-
size=size,
486-
),
487-
files=files if files else None,
488-
content_type=content_type,
489-
)
490-
443+
if mask is not None:
444+
if image.shape[0] != 1:
445+
raise Exception("Cannot use a mask with multiple image")
446+
if mask.shape[1:] != image.shape[1:-1]:
447+
raise Exception("Mask and Image must be the same size")
448+
_, height, width = mask.shape
449+
rgba_mask = torch.zeros(height, width, 4, device="cpu")
450+
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
451+
452+
scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0), total_pixels=2048*2048).squeeze()
453+
454+
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
455+
mask_img = Image.fromarray(mask_np)
456+
mask_img_byte_arr = BytesIO()
457+
mask_img.save(mask_img_byte_arr, format="PNG")
458+
mask_img_byte_arr.seek(0)
459+
files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
460+
461+
response = await sync_op(
462+
cls,
463+
ApiEndpoint(path="/proxy/openai/images/edits", method="POST"),
464+
response_model=OpenAIImageGenerationResponse,
465+
data=OpenAIImageEditRequest(
466+
model=model,
467+
prompt=prompt,
468+
quality=quality,
469+
background=background,
470+
n=n,
471+
seed=seed,
472+
size=size,
473+
moderation="low",
474+
),
475+
content_type="multipart/form-data",
476+
files=files,
477+
)
478+
else:
479+
response = await sync_op(
480+
cls,
481+
ApiEndpoint(path="/proxy/openai/images/generations", method="POST"),
482+
response_model=OpenAIImageGenerationResponse,
483+
data=OpenAIImageGenerationRequest(
484+
model=model,
485+
prompt=prompt,
486+
quality=quality,
487+
background=background,
488+
n=n,
489+
seed=seed,
490+
size=size,
491+
moderation="low",
492+
),
493+
)
491494
return IO.NodeOutput(await validate_and_cast_response(response))
492495

493496

comfy_api_nodes/util/conversions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def pil_to_bytesio(img: Image.Image, mime_type: str = "image/png") -> BytesIO:
129129
return img_byte_arr
130130

131131

132-
def downscale_image_tensor(image, total_pixels=1536 * 1024) -> torch.Tensor:
132+
def downscale_image_tensor(image: torch.Tensor, total_pixels: int = 1536 * 1024) -> torch.Tensor:
133133
"""Downscale input image tensor to roughly the specified total pixels."""
134134
samples = image.movedim(-1, 1)
135135
total = int(total_pixels)

0 commit comments

Comments
 (0)