Skip to content

Commit 2bb12bb

Browse files
cleop-googlecopybara-github
authored andcommitted
chore: GenAI SDK client(multimodal) - Move to_bigframes method to MultimodalDataset class.
BREAKING CHANGE: `to_bigframes` has been removed from the datasets module and moved into the `MultimodalDataset` class. PiperOrigin-RevId: 889731146
1 parent e164b19 commit 2bb12bb

4 files changed

Lines changed: 48 additions & 100 deletions

File tree

tests/unit/vertexai/genai/replays/test_get_multimodal_datasets.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,14 @@
1515
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
1616

1717
from tests.unit.vertexai.genai.replays import pytest_helper
18-
from vertexai._genai import _datasets_utils
1918
from vertexai._genai import types
2019

21-
from unittest import mock
2220
import pytest
2321

2422
BIGQUERY_TABLE_NAME = "vertex-sdk-dev.multimodal_dataset.test-table"
2523
DATASET = "8810841321427173376"
2624

2725

28-
@pytest.fixture
29-
def mock_import_bigframes(is_replay_mode):
30-
if is_replay_mode:
31-
with mock.patch.object(
32-
_datasets_utils, "_try_import_bigframes"
33-
) as mock_import_bigframes:
34-
mock_read_gbq_table_result = mock.MagicMock()
35-
mock_read_gbq_table_result.sql = f"SLECT * FROM `{BIGQUERY_TABLE_NAME}`"
36-
37-
bigframes = mock.MagicMock()
38-
bigframes.pandas.read_gbq_table.return_value = mock_read_gbq_table_result
39-
40-
mock_import_bigframes.return_value = bigframes
41-
yield mock_import_bigframes
42-
else:
43-
yield None
44-
45-
4626
def test_get_dataset(client):
4727
dataset = client.datasets._get_multimodal_dataset(
4828
name=DATASET,
@@ -61,15 +41,6 @@ def test_get_dataset_from_public_method(client):
6141
assert dataset.display_name == "test-display-name"
6242

6343

64-
@pytest.mark.usefixtures("mock_import_bigframes")
65-
def test_to_bigframes(client):
66-
dataset = client.datasets.get_multimodal_dataset(
67-
name=DATASET,
68-
)
69-
df = client.datasets.to_bigframes(multimodal_dataset=dataset)
70-
assert BIGQUERY_TABLE_NAME in df.sql
71-
72-
7344
pytestmark = pytest_helper.setup(
7445
file=__file__,
7546
globals_for_file=globals(),
@@ -96,13 +67,3 @@ async def test_get_dataset_from_public_method_async(client):
9667
assert isinstance(dataset, types.MultimodalDataset)
9768
assert dataset.name.endswith(DATASET)
9869
assert dataset.display_name == "test-display-name"
99-
100-
101-
@pytest.mark.asyncio
102-
@pytest.mark.usefixtures("mock_import_bigframes")
103-
async def test_to_bigframes_async(client):
104-
dataset = await client.aio.datasets.get_multimodal_dataset(
105-
name=DATASET,
106-
)
107-
df = await client.aio.datasets.to_bigframes(multimodal_dataset=dataset)
108-
assert BIGQUERY_TABLE_NAME in df.sql

tests/unit/vertexai/genai/test_multimodal_datasets_genai.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,26 @@
1313
# limitations under the License.
1414
#
1515
"""Tests for multimodal datasets."""
16+
from unittest import mock
1617

18+
from vertexai._genai import _datasets_utils
1719
from vertexai._genai import types
20+
import pytest
21+
22+
23+
@pytest.fixture
24+
def mock_import_bigframes():
25+
with mock.patch.object(
26+
_datasets_utils, "_try_import_bigframes"
27+
) as mock_import_bigframes:
28+
mock_read_gbq_table_result = mock.MagicMock()
29+
mock_read_gbq_table_result.sql = "SELECT * FROM `project.dataset.table`"
30+
31+
bigframes = mock.MagicMock()
32+
bigframes.pandas.read_gbq_table.return_value = mock_read_gbq_table_result
33+
34+
mock_import_bigframes.return_value = bigframes
35+
yield mock_import_bigframes
1836

1937

2038
class TestMultimodalDataset:
@@ -126,3 +144,14 @@ def test_set_bigquery_uri_preserves_other_fields(self):
126144
dataset.metadata.gemini_request_read_config.assembled_request_column_name
127145
== "test_column"
128146
)
147+
148+
def test_to_bigframes(self, mock_import_bigframes):
149+
dataset = types.MultimodalDataset()
150+
dataset.set_bigquery_uri("bq://project.dataset.table")
151+
152+
df = dataset.to_bigframes()
153+
154+
assert "project.dataset.table" in df.sql
155+
mock_import_bigframes.return_value.pandas.read_gbq_table.assert_called_once_with(
156+
"project.dataset.table"
157+
)

vertexai/_genai/datasets.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -970,36 +970,6 @@ def create_from_bigframes(
970970
config=config,
971971
)
972972

973-
def to_bigframes(
974-
self,
975-
*,
976-
multimodal_dataset: types.MultimodalDatasetOrDict,
977-
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
978-
"""Converts a multimodal dataset to a BigFrames dataframe.
979-
980-
This is the preferred method to inspect the multimodal dataset in a
981-
notebook.
982-
983-
Args:
984-
multimodal_dataset:
985-
Required. A representation of a multimodal dataset.
986-
987-
Returns:
988-
A BigFrames dataframe.
989-
"""
990-
bigframes = _datasets_utils._try_import_bigframes()
991-
992-
if isinstance(multimodal_dataset, dict):
993-
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
994-
elif not multimodal_dataset:
995-
multimodal_dataset = types.MultimodalDataset()
996-
997-
if multimodal_dataset.bigquery_uri is None:
998-
raise ValueError("Multimodal dataset bigquery source uri is not set.")
999-
return bigframes.pandas.read_gbq_table(
1000-
multimodal_dataset.bigquery_uri.removeprefix("bq://")
1001-
)
1002-
1003973
def update_multimodal_dataset(
1004974
self,
1005975
*,
@@ -2115,37 +2085,6 @@ async def create_from_bigframes(
21152085
config=config,
21162086
)
21172087

2118-
async def to_bigframes(
2119-
self,
2120-
*,
2121-
multimodal_dataset: types.MultimodalDatasetOrDict,
2122-
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
2123-
"""Converts a multimodal dataset to a BigFrames dataframe.
2124-
2125-
This is the preferred method to inspect the multimodal dataset in a
2126-
notebook.
2127-
2128-
Args:
2129-
multimodal_dataset:
2130-
Required. A representation of a multimodal dataset.
2131-
2132-
Returns:
2133-
A BigFrames dataframe.
2134-
"""
2135-
bigframes = _datasets_utils._try_import_bigframes()
2136-
2137-
if isinstance(multimodal_dataset, dict):
2138-
multimodal_dataset = types.MultimodalDataset(**multimodal_dataset)
2139-
elif not multimodal_dataset:
2140-
multimodal_dataset = types.MultimodalDataset()
2141-
2142-
if multimodal_dataset.bigquery_uri is None:
2143-
raise ValueError("Multimodal dataset bigquery source uri is missing.")
2144-
return await asyncio.to_thread(
2145-
bigframes.pandas.read_gbq_table,
2146-
multimodal_dataset.bigquery_uri.removeprefix("bq://"),
2147-
)
2148-
21492088
async def update_multimodal_dataset(
21502089
self,
21512090
*,

vertexai/_genai/types/common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12424,6 +12424,25 @@ def set_bigquery_uri(
1242412424
metadata.input_config = input_config
1242512425
self.metadata = metadata
1242612426

12427+
def to_bigframes(
12428+
self,
12429+
) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
12430+
"""Converts the multimodal dataset to a BigFrames dataframe.
12431+
12432+
This is the preferred method to inspect the multimodal dataset in a
12433+
notebook.
12434+
12435+
Returns:
12436+
A BigFrames dataframe.
12437+
"""
12438+
from .. import _datasets_utils
12439+
12440+
bigframes = _datasets_utils._try_import_bigframes()
12441+
12442+
if self.bigquery_uri is None:
12443+
raise ValueError("Multimodal dataset bigquery source uri is not set.")
12444+
return bigframes.pandas.read_gbq_table(self.bigquery_uri.removeprefix("bq://"))
12445+
1242712446

1242812447
class MultimodalDatasetDict(TypedDict, total=False):
1242912448
"""Represents a multimodal dataset."""

0 commit comments

Comments
 (0)