Skip to content

Commit 5dd51a2

Browse files
fthoelecopybara-github
authored andcommitted
fix: Remove unnecessary pandas import from multimodal datasets preview module.
PiperOrigin-RevId: 823383334
1 parent 57d2709 commit 5dd51a2

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

google/cloud/aiplatform/preview/datasets.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#
1717

1818
import dataclasses
19+
import io
20+
import json
1921
from typing import Dict, List, Optional, Tuple
2022
import uuid
2123

@@ -32,7 +34,6 @@
3234
from vertexai import generative_models
3335
from vertexai.generative_models import _generative_models
3436
from vertexai.preview import prompts
35-
import pandas
3637

3738
from google.protobuf import field_mask_pb2
3839
from google.protobuf import struct_pb2
@@ -758,7 +759,7 @@ def from_bigquery(
758759
def from_pandas(
759760
cls,
760761
*,
761-
dataframe: pandas.DataFrame,
762+
dataframe: "pandas.DataFrame", # type: ignore # noqa: F821
762763
target_table_id: Optional[str] = None,
763764
display_name: Optional[str] = None,
764765
project: Optional[str] = None,
@@ -1077,15 +1078,15 @@ def from_gemini_request_jsonl(
10771078

10781079
jsonl_string = blob.download_as_text()
10791080
lines = [line.strip() for line in jsonl_string.splitlines() if line.strip()]
1080-
df = pandas.DataFrame(lines, columns=[request_column_name])
1081+
json_string = json.dumps({request_column_name: lines})
10811082

10821083
session_options = bigframes.BigQueryOptions(
10831084
credentials=credentials,
10841085
project=project,
10851086
location=location,
10861087
)
10871088
with bigframes.connect(session_options) as session:
1088-
temp_bigframes_df = session.read_pandas(df)
1089+
temp_bigframes_df = session.read_json(io.StringIO(json_string))
10891090
temp_bigframes_df[request_column_name] = bigframes.bigquery.parse_json(
10901091
temp_bigframes_df[request_column_name]
10911092
)

tests/unit/aiplatform/test_multimodal_datasets.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,10 @@ def test_create_dataset_from_gemini_request_jsonl(
584584
mock_bucket.blob.assert_called_once_with("test-file.jsonl")
585585
mock_blob.download_as_text.assert_called_once()
586586

587-
pandas.testing.assert_frame_equal(
588-
session_mock.read_pandas.call_args[0][0],
589-
pandas.DataFrame({"requests": ["json_line_1", "json_line_2"]}),
587+
session_mock.read_json.assert_called_once()
588+
assert (
589+
session_mock.read_json.call_args[0][0].getvalue()
590+
== '{"requests": ["json_line_1", "json_line_2"]}'
590591
)
591592
bq_client_mock.return_value.copy_table.assert_called_once_with(
592593
sources=mock.ANY,
@@ -636,9 +637,10 @@ def test_create_dataset_from_gemini_request_jsonl_without_target_table_id(
636637
mock_bucket.blob.assert_called_once_with("test-file.jsonl")
637638
mock_blob.download_as_text.assert_called_once()
638639

639-
pandas.testing.assert_frame_equal(
640-
session_mock.read_pandas.call_args[0][0],
641-
pandas.DataFrame({"requests": ["json_line_1", "json_line_2"]}),
640+
session_mock.read_json.assert_called_once()
641+
assert (
642+
session_mock.read_json.call_args[0][0].getvalue()
643+
== '{"requests": ["json_line_1", "json_line_2"]}'
642644
)
643645

644646
# Assert that the default BQ dataset is created

0 commit comments

Comments
 (0)