Skip to content

Commit de812e9

Browse files
committed
Implement edit and fix generate
1 parent 9a73bdb commit de812e9

File tree

2 files changed

+203
-39
lines changed

2 files changed

+203
-39
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# mcp-openai-image-generation
22

33
[![Release](https://img.shields.io/github/v/release/ai-zerolab/mcp-openai-image-generation)](https://img.shields.io/github/v/release/ai-zerolab/mcp-openai-image-generation)
4-
[![Build status](https://img.shields.io/github/actions/workflow/status/ai-zerolab/mcp-openai-image-generation/main.yml?branch=main)](https://github.com/ai-zerolab/mcp-openai-image-generation/actions/workflows/main.yml?query=branch%3Amain)
54
[![Commit activity](https://img.shields.io/github/commit-activity/m/ai-zerolab/mcp-openai-image-generation)](https://img.shields.io/github/commit-activity/m/ai-zerolab/mcp-openai-image-generation)
65
[![License](https://img.shields.io/github/license/ai-zerolab/mcp-openai-image-generation)](https://img.shields.io/github/license/ai-zerolab/mcp-openai-image-generation)
76

mcp_openai_image_generation/app.py

Lines changed: 203 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import base64
2+
import os
23
import uuid
34
from pathlib import Path
5+
from typing import Annotated
46

57
from mcp.server.fastmcp import FastMCP
68
from mcp.types import ImageContent
@@ -9,58 +11,218 @@
911

1012
mcp = FastMCP("openai-image-generation")
1113
client = OpenAI()
14+
DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-image-1")
1215

1316

1417
@mcp.tool(
1518
description="Generate an image with OpenAI model, save or display it. "
1619
"For saving, use the `output_dir` parameter."
1720
)
1821
def generate_image(
19-
prompt: str = Field(..., description=""),
20-
background: str | None = Field(
21-
None,
22-
description="Allows to set transparency for the background of the generated image(s). "
23-
"This parameter is only supported for `gpt-image-1`. "
24-
"Must be one of `transparent`, `opaque` or `auto` (default value). "
25-
"When `auto` is used, the model will automatically determine the best background for the image.",
26-
),
27-
n: int | None = Field(
28-
1,
29-
description="The number of images to generate. Must be between 1 and 10. For `dall-e-3`, "
30-
"only `n=1` is supported.",
31-
),
32-
model: str | None = Field(
33-
"gpt-image-1",
34-
description='Must be one of ["dall-e-2", "dall-e-3", "gpt-image-1"]',
35-
),
36-
output_format: str | None = Field(
37-
"png",
38-
description="The format in which the generated images are returned. "
39-
"This parameter is only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.",
40-
),
41-
size: str | None = Field(
42-
"auto",
43-
description="The size of the generated images. "
44-
"Must be one of `1024x1024`, `1536x1024` (landscape), "
45-
"`1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, "
46-
"one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, "
47-
"and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.",
48-
),
49-
output_dir: str | None = Field(
50-
None,
51-
description="The directory to save the generated image(s). If not provided, the image(s) will be displayed.",
52-
),
53-
) -> list[ImageContent] | dict:
54-
result = client.images.generate(
22+
prompt: Annotated[str, Field(description="A text description of the desired image(s).")],
23+
background: Annotated[
24+
str | None,
25+
Field(
26+
description="Allows to set transparency for the background of the generated image(s). "
27+
"This parameter is only supported for `gpt-image-1`. "
28+
"Must be one of `transparent`, `opaque` or `auto` (default value). "
29+
"When `auto` is used, the model will automatically determine the best background for the image.",
30+
),
31+
] = None,
32+
n: Annotated[
33+
int | None,
34+
Field(
35+
description="The number of images to generate. Must be between 1 and 10. For `dall-e-3`, "
36+
"only `n=1` is supported.",
37+
),
38+
] = 1,
39+
quality: Annotated[
40+
str | None,
41+
Field(
42+
description="""The quality of the image that will be generated.
43+
44+
- `auto` (default value) will automatically select the best quality for the given model.
45+
- `high`, `medium` and `low` are supported for `gpt-image-1`.
46+
- `hd` and `standard` are supported for `dall-e-3`.
47+
- `standard` is the only option for `dall-e-2`.
48+
"""
49+
),
50+
] = "auto",
51+
model: Annotated[
52+
str,
53+
Field(
54+
description='Should be one of ["dall-e-2", "dall-e-3", "gpt-image-1"]',
55+
),
56+
] = DEFAULT_MODEL,
57+
output_format: Annotated[
58+
str,
59+
Field(
60+
description="The format in which the generated images are returned. "
61+
"This parameter is only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.",
62+
),
63+
] = "png",
64+
size: Annotated[
65+
str,
66+
Field(
67+
description="The size of the generated images. "
68+
"Must be one of `1024x1024`, `1536x1024` (landscape), "
69+
"`1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, "
70+
"one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, "
71+
"and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.",
72+
),
73+
] = "auto",
74+
output_dir: Annotated[
75+
str | None,
76+
Field(
77+
description="The directory to save the generated image(s). If not provided, the image(s) will be displayed.",
78+
),
79+
] = None,
80+
) -> ImageContent | list[ImageContent] | dict:
81+
response = client.images.generate(
5582
prompt=prompt,
5683
background=background,
84+
n=n,
85+
quality=quality,
5786
model=model,
5887
output_format=output_format,
5988
size=size,
6089
)
90+
if not response.data:
91+
return {"generated_images": [], "message": "No images generated"}
92+
93+
case_id = uuid.uuid4().hex
94+
result = []
95+
for count, image in enumerate(response.data):
96+
image_base64 = image.b64_json
97+
if output_dir:
98+
image_bytes = base64.b64decode(image_base64)
99+
output_dir: Path = Path(output_dir)
100+
output_dir.mkdir(parents=True, exist_ok=True)
101+
output_path = output_dir / f"{case_id}-{count}.{output_format}"
102+
output_path.write_bytes(image_bytes)
103+
result.append(output_path.absolute().as_posix())
104+
else:
105+
result.append(
106+
ImageContent(
107+
type="image",
108+
data=image_base64,
109+
mimeType=f"image/{output_format}",
110+
annotations={"case_id": case_id, "count": count, "prompt": prompt},
111+
)
112+
)
113+
if len(result) == 1:
114+
result = result[0]
115+
return {"generated_images": result} if output_dir else result
116+
117+
118+
# TODO: Edit, inpainting,
119+
120+
121+
@mcp.tool(
122+
description="Edit an image with OpenAI model, save or display it. "
123+
"For saving, use the `output_dir` parameter."
124+
"You can use one or more images as a reference to generate a new image, "
125+
"or edit an image using a mask(inpainting). "
126+
"For inpainting, if you provide multiple input images, the `mask` will be applied to the first image."
127+
)
128+
def edit_image(
129+
prompt: Annotated[
130+
str,
131+
Field(
132+
description="The prompt to generate the image.",
133+
),
134+
],
135+
images: Annotated[
136+
list[str],
137+
Field(
138+
description="The image(s) to edit. Must be a supported image file or an array of images. Use absolute paths.",
139+
),
140+
],
141+
mask: Annotated[
142+
str | None,
143+
Field(
144+
description="The mask to apply to the image(s). Must be a supported image file. Use absolute paths.",
145+
),
146+
] = None,
147+
background: Annotated[
148+
str | None,
149+
Field(
150+
description="Allows to set transparency for the background of the generated image(s). "
151+
"This parameter is only supported for `gpt-image-1`. "
152+
"Must be one of `transparent`, `opaque` or `auto` (default value). "
153+
"When `auto` is used, the model will automatically determine the best background for the image.",
154+
),
155+
] = None,
156+
n: Annotated[
157+
int | None,
158+
Field(
159+
description="The number of images to generate. Must be between 1 and 10. For `dall-e-3`, "
160+
"only `n=1` is supported.",
161+
),
162+
] = 1,
163+
quality: Annotated[
164+
str | None,
165+
Field(
166+
description="""The quality of the image that will be generated.
167+
168+
- `auto` (default value) will automatically select the best quality for the given model.
169+
- `high`, `medium` and `low` are supported for `gpt-image-1`.
170+
- `hd` and `standard` are supported for `dall-e-3`.
171+
- `standard` is the only option for `dall-e-2`.
172+
"""
173+
),
174+
] = "auto",
175+
model: Annotated[
176+
str,
177+
Field(
178+
description='Should be one of ["dall-e-2", "dall-e-3", "gpt-image-1"]',
179+
),
180+
] = DEFAULT_MODEL,
181+
output_format: Annotated[
182+
str,
183+
Field(
184+
description="The format in which the generated images are returned. "
185+
"This parameter is only supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.",
186+
),
187+
] = "png",
188+
size: Annotated[
189+
str,
190+
Field(
191+
description="The size of the generated images. "
192+
"Must be one of `1024x1024`, `1536x1024` (landscape), "
193+
"`1024x1536` (portrait), or `auto` (default value) for `gpt-image-1`, "
194+
"one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, "
195+
"and one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.",
196+
),
197+
] = "auto",
198+
output_dir: Annotated[
199+
str | None,
200+
Field(
201+
description="The directory to save the generated images. "
202+
"If not provided, the generated images will be returned as a list of ImageContent objects.",
203+
),
204+
] = None,
205+
) -> list[ImageContent] | ImageContent | dict:
206+
images = [open(image, "rb") for image in images] # noqa: SIM115
207+
mask = open(mask, "rb") if mask else None # noqa: SIM115
208+
response = client.images.edit(
209+
image=images,
210+
prompt=prompt,
211+
background=background,
212+
mask=mask,
213+
quality=quality,
214+
model=model,
215+
n=n,
216+
response_format=output_format,
217+
size=size,
218+
)
219+
220+
if not response.data:
221+
return {"generated_images": [], "message": "No images generated"}
222+
61223
case_id = uuid.uuid4().hex
62224
result = []
63-
for count, image in enumerate(result.data):
225+
for count, image in enumerate(response.data):
64226
image_base64 = image.b64_json
65227
if output_dir:
66228
image_bytes = base64.b64decode(image_base64)
@@ -72,9 +234,12 @@ def generate_image(
72234
else:
73235
result.append(
74236
ImageContent(
237+
type="image",
75238
data=image_base64,
76239
mimeType=f"image/{output_format}",
77240
annotations={"case_id": case_id, "count": count, "prompt": prompt},
78241
)
79242
)
80-
return result if output_dir else {"generated_images": result}
243+
if len(result) == 1:
244+
result = result[0]
245+
return {"generated_images": result} if output_dir else result

0 commit comments

Comments
 (0)