Skip to content

Commit ba237cf

Browse files
feat(genai): add new code-execution code samples (#13763)
* feat(genai): Add 3 new CodeExecutions(new folder) samples. These code samples demonstrate usage of Gemini's ability to dynamically generate required code and then execute it. * fix(genai): update the annotate code sample prompts * chore(genai): add example response & input image referrence. * chore(genai): add example out for barplot example. * chore(genai): add example out for cropimage example. * fix(genai): fix region tags * cleanup(genai): cleanup old files * fix(genai): lint error * chore(genai): update confi to limit tests to just python 3.12 * fix: add Pillow to requirements.txt * fix: change copyright licensed year to 2026 Also update template folder files. * chore: remove the images_folder Simplify the code footprint by using samples available in GCS bucket. * chore: rollback year-update for templatefolder files. --------- Co-authored-by: Jennifer Davis <sigje@google.com>
1 parent eeb7fe7 commit ba237cf

File tree

7 files changed

+484
-0
lines changed

7 files changed

+484
-0
lines changed
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> bool:
17+
# [START googlegenaisdk_codeexecution_annotateimage_with_txt_gcsimg]
18+
import io
19+
from PIL import Image
20+
from google import genai
21+
from google.genai import types
22+
23+
client = genai.Client()
24+
25+
response = client.models.generate_content(
26+
model="gemini-3-flash-preview",
27+
contents=[
28+
types.Part.from_uri(
29+
file_uri="https://storage.googleapis.com/cloud-samples-data/generative-ai/image/robotic.jpeg",
30+
mime_type="image/png",
31+
),
32+
"Annotate on the image with arrows of different colors, which object should go into which bin.",
33+
],
34+
config=types.GenerateContentConfig(tools=[types.Tool(code_execution=types.ToolCodeExecution)]),
35+
)
36+
37+
img_count = 0
38+
for part in response.candidates[0].content.parts:
39+
if part.text is not None:
40+
print(part.text)
41+
if part.executable_code is not None:
42+
print("####################### 1. Generate Python Code #######################")
43+
print(part.executable_code.code)
44+
if part.code_execution_result is not None:
45+
print("####################### 2. Executing Python Code #######################")
46+
print(part.code_execution_result.output)
47+
# For local executions, save the output to a local filename
48+
if part.as_image() is not None:
49+
print("####################### 3. Save Output #######################")
50+
img_count += 1
51+
output_location = f"robotic-annotate-output-{img_count}.jpg"
52+
image_data = part.as_image().image_bytes
53+
image = Image.open(io.BytesIO(image_data))
54+
image = image.convert("RGB")
55+
image.save(output_location)
56+
print(f"Output is saved to {output_location}")
57+
# Example response:
58+
# ####################### 1. Generate Python Code #######################
59+
# import PIL.Image
60+
# import PIL.ImageDraw
61+
#
62+
# # Load the image to get dimensions
63+
# img = PIL.Image.open('f_https___storage.googleapis.com_cloud_samples_data_generative_ai_image_robotic.jpeg')
64+
# width, height = img.size
65+
#
66+
# # Define objects and bins with normalized coordinates [ymin, xmin, ymax, xmax]
67+
# bins = {
68+
# 'light_blue': [118, 308, 338, 436],
69+
# 'green': [248, 678, 458, 831],
70+
# 'black': [645, 407, 898, 578]
71+
# }
72+
#
73+
# objects = [
74+
# {'name': 'green pepper', 'box': [256, 482, 296, 546], 'target': 'green'},
75+
# {'name': 'red pepper', 'box': [317, 478, 349, 544], 'target': 'green'},
76+
# {'name': 'grapes', 'box': [584, 555, 664, 593], 'target': 'green'},
77+
# {'name': 'cherries', 'box': [463, 671, 511, 718], 'target': 'green'},
78+
# {'name': 'soda can', 'box': [397, 524, 489, 605], 'target': 'light_blue'},
79+
# {'name': 'brown snack', 'box': [397, 422, 475, 503], 'target': 'black'},
80+
# {'name': 'welch snack', 'box': [520, 466, 600, 543], 'target': 'black'},
81+
# {'name': 'paper towel', 'box': [179, 564, 250, 607], 'target': 'black'},
82+
# {'name': 'plastic cup', 'box': [271, 587, 346, 643], 'target': 'black'},
83+
# ]
84+
#
85+
# # Helper to get center of a normalized box
86+
# def get_center(box):
87+
# ymin, xmin, ymax, xmax = box
88+
# return ((xmin + xmax) / 2000 * width, (ymin + ymax) / 2000 * height)
89+
#
90+
# draw = PIL.ImageDraw.Draw(img)
91+
#
92+
# # Define arrow colors based on target bin
93+
# colors = {
94+
# 'green': 'green',
95+
# 'light_blue': 'blue',
96+
# 'black': 'red'
97+
# }
98+
#
99+
# for obj in objects:
100+
# start_point = get_center(obj['box'])
101+
# end_point = get_center(bins[obj['target']])
102+
# color = colors[obj['target']]
103+
# # Drawing a line with an arrow head (simulated with a few extra lines)
104+
# draw.line([start_point, end_point], fill=color, width=5)
105+
# # Simple arrowhead
106+
# import math
107+
# angle = math.atan2(end_point[1] - start_point[1], end_point[0] - start_point[0])
108+
# arrow_len = 20
109+
# p1 = (end_point[0] - arrow_len * math.cos(angle - math.pi / 6),
110+
# end_point[1] - arrow_len * math.sin(angle - math.pi / 6))
111+
# p2 = (end_point[0] - arrow_len * math.cos(angle + math.pi / 6),
112+
# end_point[1] - arrow_len * math.sin(angle + math.pi / 6))
113+
# draw.line([end_point, p1], fill=color, width=5)
114+
# draw.line([end_point, p2], fill=color, width=5)
115+
#
116+
# img.save('annotated_robotic.jpeg')
117+
#
118+
# # Also list detections for confirmation
119+
# # [
120+
# # {"box_2d": [118, 308, 338, 436], "label": "light blue bin"},
121+
# # {"box_2d": [248, 678, 458, 831], "label": "green bin"},
122+
# # {"box_2d": [645, 407, 898, 578], "label": "black bin"},
123+
# # {"box_2d": [256, 482, 296, 546], "label": "green pepper"},
124+
# # {"box_2d": [317, 478, 349, 544], "label": "red pepper"},
125+
# # {"box_2d": [584, 555, 664, 593], "label": "grapes"},
126+
# # {"box_2d": [463, 671, 511, 718], "label": "cherries"},
127+
# # {"box_2d": [397, 524, 489, 605], "label": "soda can"},
128+
# # {"box_2d": [397, 422, 475, 503], "label": "brown snack"},
129+
# # {"box_2d": [520, 466, 600, 543], "label": "welch snack"},
130+
# # {"box_2d": [179, 564, 250, 607], "label": "paper towel"},
131+
# # {"box_2d": [271, 587, 346, 643], "label": "plastic cup"}
132+
# # ]
133+
#
134+
# ####################### 2. Executing Python Code #######################
135+
# None
136+
# ####################### 3. Save Output #######################
137+
# Output is saved to output-annotate-image-1.jpg
138+
# The image has been annotated with arrows indicating the appropriate bin for each object based on standard waste sorting practices:
139+
#
140+
# - **Green Arrows (Compost):** Organic items such as the green pepper, red pepper, grapes, and cherries are directed to the **green bin**.
141+
# - **Blue Arrow (Recycling):** The crushed soda can is directed to the **light blue bin**.
142+
# - **Red Arrows (Trash/Landfill):** Non-recyclable or contaminated items like the snack wrappers (brown and Welch's), the white paper towel, and the small plastic cup are directed to the **black bin**.
143+
#
144+
# These categorizations follow common sorting rules where green is for organics, blue for recyclables, and black for general waste.
145+
# [END googlegenaisdk_codeexecution_annotateimage_with_txt_gcsimg]
146+
return True
147+
148+
149+
if __name__ == "__main__":
150+
generate_content()
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
def generate_content() -> bool:
17+
# [START googlegenaisdk_codeexecution_barplot_with_txt_img]
18+
import io
19+
from PIL import Image
20+
from google import genai
21+
from google.genai import types
22+
23+
# Use to the benchmark image in Cloud Storage
24+
image = types.Part.from_uri(
25+
file_uri="https://storage.googleapis.com/cloud-samples-data/generative-ai/image/benchmark.jpeg",
26+
mime_type="image/jpeg",
27+
)
28+
29+
client = genai.Client()
30+
31+
response = client.models.generate_content(
32+
model="gemini-3-flash-preview",
33+
contents=[
34+
image,
35+
"Make a bar chart of per-category performance, normalize prior SOTA as 1.0 for each task,"
36+
"then take average per-category. Plot using matplotlib with nice style.",
37+
],
38+
config=types.GenerateContentConfig(tools=[types.Tool(code_execution=types.ToolCodeExecution)]),
39+
)
40+
41+
img_count = 0
42+
for part in response.candidates[0].content.parts:
43+
if part.text is not None:
44+
print(part.text)
45+
if part.executable_code is not None:
46+
print("####################### 1. Generate Python Code #######################")
47+
print(part.executable_code.code)
48+
if part.code_execution_result is not None:
49+
print("####################### 2. Executing Python Code #######################")
50+
print(part.code_execution_result.output)
51+
# For local executions, save the output to a local filename
52+
if part.as_image() is not None:
53+
print("####################### 3. Save Output #######################")
54+
img_count += 1
55+
output_location = f"output-barplot-{img_count}.jpg"
56+
image_data = part.as_image().image_bytes
57+
image = Image.open(io.BytesIO(image_data))
58+
image = image.convert("RGB")
59+
image.save(output_location)
60+
print(f"Output is saved to {output_location}")
61+
# Example response:
62+
# ####################### 1. Generate Python Code #######################
63+
# import matplotlib.pyplot as plt
64+
# import numpy as np
65+
#
66+
# data = [
67+
# # Category, Benchmark, G3P, G2.5P, C4.5, GPT5.1, lower_is_better
68+
# ("Visual Reasoning", "MMMU Pro", 81.0, 68.0, 72.0, 76.0, False),
69+
# ("Visual Reasoning", "VLMsAreBiased", 50.6, 24.3, 32.7, 21.7, False),
70+
# ("Document", "CharXiv Reasoning", 81.4, 69.6, 67.2, 69.5, False),
71+
# ("Document", "OmniDocBench1.5*", 0.115, 0.145, 0.120, 0.147, True),
72+
# ("Spatial", "ERQA", 70.5, 56.0, 51.3, 60.0, False),
73+
# ("Spatial", "Point-Bench", 85.5, 62.7, 38.5, 41.8, False),
74+
# ("Spatial", "RefSpatial", 65.5, 33.6, 19.5, 28.2, False),
75+
# ("Spatial", "CV-Bench", 92.0, 85.9, 83.8, 84.6, False),
76+
# ("Spatial", "MindCube", 77.7, 57.5, 58.5, 61.7, False),
77+
# ("Screen", "ScreenSpot Pro", 72.7, 11.4, 49.9, 3.50, False),
78+
# ("Screen", "Gui-World QA", 68.0, 42.8, 44.9, 38.7, False),
79+
# ("Video", "Video-MMMU", 87.6, 83.6, 84.4, 80.4, False),
80+
# ("Video", "Video-MME", 88.4, 86.9, 84.1, 86.3, False),
81+
# ("Video", "1H-VideoQA", 81.8, 79.4, 52.0, 61.5, False),
82+
# ("Video", "Perception Test", 80.0, 78.4, 74.1, 77.8, False),
83+
# ("Video", "YouCook2", 222.7, 188.3, 145.8, 132.4, False),
84+
# ("Video", "Vatex", 77.4, 71.3, 60.1, 62.9, False),
85+
# ("Video", "Motion Bench", 70.3, 66.3, 65.9, 61.1, False),
86+
# ("Education", "Math Kangaroo", 84.4, 77.4, 68.9, 79.9, False),
87+
# ("Biomedical", "MedXpertQA-MM", 77.8, 65.9, 62.2, 65.5, False),
88+
# ("Biomedical", "VQA-RAD", 81.9, 71.4, 76.0, 72.2, False),
89+
# ("Biomedical", "MicroVQA", 68.8, 63.5, 61.4, 61.5, False),
90+
# ]
91+
#
92+
# normalized_scores = []
93+
# for cat, bench, g3p, g25p, c45, gpt, lib in data:
94+
# others = [g25p, c45, gpt]
95+
# if lib:
96+
# sota = min(others)
97+
# norm_score = sota / g3p
98+
# else:
99+
# sota = max(others)
100+
# norm_score = g3p / sota
101+
# normalized_scores.append((cat, norm_score))
102+
#
103+
# categories = {}
104+
# for cat, score in normalized_scores:
105+
# if cat not in categories:
106+
# categories[cat] = []
107+
# categories[cat].append(score)
108+
#
109+
# avg_per_category = {cat: np.mean(scores) for cat, scores in categories.items()}
110+
#
111+
# # Plotting
112+
# cats = list(avg_per_category.keys())
113+
# values = [avg_per_category[c] for c in cats]
114+
#
115+
# # Sort categories for better visualization if needed, or keep order from data
116+
# plt.figure(figsize=(10, 6))
117+
# plt.style.use('ggplot')
118+
# bars = plt.bar(cats, values, color='skyblue', edgecolor='navy')
119+
#
120+
# plt.axhline(y=1.0, color='red', linestyle='--', label='Prior SOTA (1.0)')
121+
# plt.ylabel('Normalized Performance (SOTA = 1.0)')
122+
# plt.title('Gemini 3 Pro Performance relative to Prior SOTA (Normalized)', fontsize=14)
123+
# plt.xticks(rotation=45, ha='right')
124+
# plt.ylim(0, max(values) * 1.2)
125+
#
126+
# for bar in bars:
127+
# yval = bar.get_height()
128+
# plt.text(bar.get_x() + bar.get_width()/2, yval + 0.02, f'{yval:.2f}x', ha='center', va='bottom')
129+
#
130+
# plt.legend()
131+
# plt.tight_layout()
132+
# plt.savefig('performance_chart.png')
133+
# plt.show()
134+
#
135+
# print(avg_per_category)
136+
#
137+
# ####################### 2. Executing Python Code #######################
138+
# {'Visual Reasoning': np.float64(1.3065950426525028), 'Document': np.float64(1.1065092453773113), 'Spatial': np.float64(1.3636746436001959), 'Screen': np.float64(1.4856952211773211), 'Video': np.float64(1.0620548283943443), 'Education': np.float64(1.0563204005006257), 'Biomedical': np.float64(1.1138909257119955)}
139+
#
140+
# ####################### 3. Save Output #######################
141+
# Output is saved to output-barplot-1.jpg
142+
# ####################### 3. Save Output #######################
143+
# Output is saved to output-barplot-2.jpg
144+
# Based on the data provided in the table, I have calculated the per-category performance of Gemini 3 Pro normalized against the prior state-of-the-art (SOTA), which is defined as the best performance among Gemini 2.5 Pro, Claude Opus 4.5, and GPT-5.1 for each benchmark.
145+
#
146+
# For benchmarks where lower values are better (indicated by an asterisk, e.g., OmniDocBench1.5*), the normalization was calculated as $\text{Prior SOTA} / \text{Gemini 3 Pro Score}$. For all other benchmarks, it was calculated as $\text{Gemini 3 Pro Score} / \text{Prior SOTA}$. The values were then averaged within each category.
147+
#
148+
# The resulting bar chart below shows that Gemini 3 Pro outperforms the prior SOTA across all categories, with the most significant gains in **Screen** (1.49x), **Spatial** (1.36x), and **Visual Reasoning** (1.31x) benchmarks.
149+
#
150+
# ![Gemini 3 Pro Performance Chart](performance_chart.png)
151+
# [END googlegenaisdk_codeexecution_barplot_with_txt_img]
152+
return True
153+
154+
155+
if __name__ == "__main__":
156+
generate_content()

0 commit comments

Comments
 (0)