Skip to content

Commit d4e211d

Browse files
BenjaminKazemicopybara-github
authored andcommitted
feat: GenAI SDK client (Multimodal Dataset) - Create a multimodal dataset from Big Query.
PiperOrigin-RevId: 822772168
1 parent 5aaa60e commit d4e211d

File tree

6 files changed

+771
-18
lines changed

6 files changed

+771
-18
lines changed

tests/unit/vertexai/genai/replays/test_create_multimodal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727

2828
def test_create_dataset(client):
29-
create_dataset_operation = client.multimodal._create_multimodal_dataset(
29+
create_dataset_operation = client.datasets._create_multimodal_dataset(
3030
name="projects/vertex-sdk-dev/locations/us-central1",
3131
display_name="test-display-name",
3232
metadata_schema_uri=METADATA_SCHEMA_URI,
@@ -50,7 +50,7 @@ def test_create_dataset(client):
5050

5151
@pytest.mark.asyncio
5252
async def test_create_dataset_async(client):
53-
create_dataset_operation = await client.aio.multimodal._create_multimodal_dataset(
53+
create_dataset_operation = await client.aio.datasets._create_multimodal_dataset(
5454
name="projects/vertex-sdk-dev/locations/us-central1",
5555
display_name="test-display-name",
5656
metadata_schema_uri=METADATA_SCHEMA_URI,
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai._genai import types
19+
20+
import pytest
21+
22+
METADATA_SCHEMA_URI = (
23+
"gs://google-cloud-aiplatform/schema/dataset/metadata/multimodal_1.0.0.yaml"
24+
)
25+
BIGQUERY_TABLE_NAME = "vertex-sdk-dev.multimodal_dataset.test-table"
26+
27+
28+
def test_create_dataset(client):
29+
create_dataset_operation = client.datasets._create_multimodal_dataset(
30+
name="projects/vertex-sdk-dev/locations/us-central1",
31+
display_name="test-display-name",
32+
metadata_schema_uri=METADATA_SCHEMA_URI,
33+
metadata={
34+
"inputConfig": {
35+
"bigquerySource": {"uri": f"bq://{BIGQUERY_TABLE_NAME}"},
36+
},
37+
},
38+
)
39+
assert isinstance(create_dataset_operation, types.MultimodalDatasetOperation)
40+
assert create_dataset_operation
41+
42+
43+
def test_create_dataset_from_bigquery(client):
44+
dataset = client.datasets.create_multimodal_dataset_from_bigquery(
45+
multimodal_dataset=types.MultimodalDataset(
46+
display_name="test-from-bigquery",
47+
bigquery_uri=BIGQUERY_TABLE_NAME,
48+
)
49+
)
50+
assert isinstance(dataset, types.MultimodalDataset)
51+
assert dataset.display_name == "test-from-bigquery"
52+
53+
54+
pytestmark = pytest_helper.setup(
55+
file=__file__,
56+
globals_for_file=globals(),
57+
)
58+
59+
pytest_plugins = ("pytest_asyncio",)
60+
61+
62+
@pytest.mark.asyncio
63+
async def test_create_dataset_async(client):
64+
create_dataset_operation = await client.aio.datasets._create_multimodal_dataset(
65+
name="projects/vertex-sdk-dev/locations/us-central1",
66+
display_name="test-display-name",
67+
metadata_schema_uri=METADATA_SCHEMA_URI,
68+
metadata={
69+
"inputConfig": {
70+
"bigquerySource": {"uri": f"bq://{BIGQUERY_TABLE_NAME}"},
71+
},
72+
},
73+
)
74+
assert isinstance(create_dataset_operation, types.MultimodalDatasetOperation)
75+
assert create_dataset_operation
76+
77+
78+
@pytest.mark.asyncio
79+
async def test_create_dataset_from_bigquery_async(client):
80+
dataset = await client.aio.datasets.create_multimodal_dataset_from_bigquery(
81+
multimodal_dataset=types.MultimodalDataset(
82+
display_name="test-from-bigquery",
83+
bigquery_uri=BIGQUERY_TABLE_NAME,
84+
)
85+
)
86+
assert isinstance(dataset, types.MultimodalDataset)
87+
assert dataset.display_name == "test-from-bigquery"
88+
89+
90+
@pytest.mark.asyncio
91+
async def test_create_dataset_from_bigquery_async_with_timeout(client):
92+
dataset = await client.aio.datasets.create_multimodal_dataset_from_bigquery(
93+
config=types.CreateMultimodalDatasetConfig(timeout=120),
94+
multimodal_dataset=types.MultimodalDataset(
95+
display_name="test-from-bigquery",
96+
bigquery_uri=BIGQUERY_TABLE_NAME,
97+
),
98+
)
99+
assert isinstance(dataset, types.MultimodalDataset)
100+
assert dataset.display_name == "test-from-bigquery"

vertexai/_genai/_datasets_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
"""Utility functions for multimodal dataset."""
16+
17+
18+
METADATA_SCHEMA_URI = (
19+
"gs://google-cloud-aiplatform/schema/dataset/metadata/multimodal_1.0.0.yaml"
20+
)

vertexai/_genai/client.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(self, api_client: genai_client.Client):
5555
self._agent_engines = None
5656
self._prompt_optimizer = None
5757
self._prompts = None
58-
self._multimodal = None
58+
self._datasets = None
5959

6060
@property
6161
@_common.experimental_warning(
@@ -121,16 +121,16 @@ def prompts(self):
121121

122122
@property
123123
@_common.experimental_warning(
124-
"The Vertex SDK GenAI async multimodal module is experimental, "
124+
"The Vertex SDK GenAI async datasets module is experimental, "
125125
"and may change in future versions."
126126
)
127-
def multimodal(self):
128-
if self._multimodal is None:
129-
self._multimodal = importlib.import_module(
130-
".multimodal",
127+
def datasets(self):
128+
if self._datasets is None:
129+
self._datasets = importlib.import_module(
130+
".datasets",
131131
__package__,
132132
)
133-
return self._multimodal.AsyncMultimodal(self._api_client)
133+
return self._datasets.AsyncDatasets(self._api_client)
134134

135135

136136
class Client:
@@ -192,7 +192,7 @@ def __init__(
192192
self._prompt_optimizer = None
193193
self._agent_engines = None
194194
self._prompts = None
195-
self._multimodal = None
195+
self._datasets = None
196196

197197
@property
198198
def evals(self) -> Any:
@@ -282,13 +282,13 @@ def prompts(self):
282282

283283
@property
284284
@_common.experimental_warning(
285-
"The Vertex SDK GenAI multimodal module is experimental, "
285+
"The Vertex SDK GenAI datasets module is experimental, "
286286
"and may change in future versions."
287287
)
288-
def multimodal(self):
289-
if self._multimodal is None:
290-
self._multimodal = importlib.import_module(
291-
".multimodal",
288+
def datasets(self):
289+
if self._datasets is None:
290+
self._datasets = importlib.import_module(
291+
".datasets",
292292
__package__,
293293
)
294-
return self._multimodal.Multimodal(self._api_client)
294+
return self._datasets.Datasets(self._api_client)

0 commit comments

Comments
 (0)