Skip to content

Commit be869ca

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
fix: GenAI Client(evals): Drop empty columns in evaluation dataset
PiperOrigin-RevId: 885162955
1 parent 0e0137e commit be869ca

File tree

2 files changed

+141
-6
lines changed

2 files changed

+141
-6
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,47 @@
5353
pytestmark = pytest.mark.usefixtures("google_auth_mock")
5454

5555

56+
class TestDropEmptyColumns:
57+
"""Unit tests for the _drop_empty_columns function."""
58+
59+
def test_drop_empty_columns(self):
60+
df = pd.DataFrame(
61+
{
62+
"col1": [1, 2, 3],
63+
"col2": [None, None, None],
64+
"col3": [[], [], []],
65+
"col4": [{}, {}, {}],
66+
"col5": [1, None, []],
67+
}
68+
)
69+
result_df = _evals_common._drop_empty_columns(df)
70+
assert "col1" in result_df.columns
71+
assert "col2" not in result_df.columns
72+
assert "col3" not in result_df.columns
73+
assert "col4" not in result_df.columns
74+
assert "col5" in result_df.columns
75+
76+
def test_drop_empty_columns_all_empty(self):
77+
df = pd.DataFrame(
78+
{
79+
"col1": [None, None, None],
80+
"col2": [[], [], []],
81+
}
82+
)
83+
result_df = _evals_common._drop_empty_columns(df)
84+
assert result_df.empty
85+
86+
def test_drop_empty_columns_none_empty(self):
87+
df = pd.DataFrame(
88+
{
89+
"col1": [1, 2, 3],
90+
"col2": ["a", "b", "c"],
91+
}
92+
)
93+
result_df = _evals_common._drop_empty_columns(df)
94+
assert list(result_df.columns) == ["col1", "col2"]
95+
96+
5697
def _create_content_dump(text: str) -> dict[str, list[genai_types.Content]]:
5798
return {
5899
"contents": [
@@ -389,6 +430,45 @@ def setup_method(self):
389430
)
390431
self.client = vertexai.Client(project=_TEST_PROJECT, location=_TEST_LOCATION)
391432

433+
@mock.patch.object(_evals_common, "Models")
434+
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
435+
def test_inference_drops_empty_columns(self, mock_eval_dataset_loader, mock_models):
436+
mock_df = pd.DataFrame(
437+
{
438+
"prompt": ["test prompt 1", "test prompt 2"],
439+
"empty_col": [None, None],
440+
"empty_list_col": [[], []],
441+
}
442+
)
443+
mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
444+
orient="records"
445+
)
446+
447+
mock_generate_content_response = genai_types.GenerateContentResponse(
448+
candidates=[
449+
genai_types.Candidate(
450+
content=genai_types.Content(
451+
parts=[genai_types.Part(text="test response")]
452+
),
453+
finish_reason=genai_types.FinishReason.STOP,
454+
)
455+
],
456+
prompt_feedback=None,
457+
)
458+
mock_models.return_value.generate_content.return_value = (
459+
mock_generate_content_response
460+
)
461+
462+
inference_result = self.client.evals.run_inference(
463+
model="gemini-pro",
464+
src=mock_df,
465+
)
466+
467+
assert "empty_col" not in inference_result.eval_dataset_df.columns
468+
assert "empty_list_col" not in inference_result.eval_dataset_df.columns
469+
assert "prompt" in inference_result.eval_dataset_df.columns
470+
assert "response" in inference_result.eval_dataset_df.columns
471+
392472
@mock.patch.object(_evals_common, "Models")
393473
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
394474
def test_inference_with_string_model_success(
@@ -5995,6 +6075,42 @@ async def test_async_generate_user_scenarios(self):
59956075
self.mock_api_client.async_request.assert_called_once()
59966076

59976077

6078+
class TestTransformDataframe:
6079+
"""Unit tests for the _transform_dataframe function."""
6080+
6081+
def test_transform_dataframe_drops_empty_columns(self):
6082+
rows = [
6083+
{
6084+
"prompt": "test prompt",
6085+
"reference": None,
6086+
"intermediate_events": [],
6087+
"agent_data": None,
6088+
"candidate1": "test response",
6089+
}
6090+
]
6091+
eval_dfs = _evals_common._transform_dataframe(rows)
6092+
assert len(eval_dfs) == 1
6093+
df = eval_dfs[0].eval_dataset_df
6094+
assert "prompt" in df.columns
6095+
assert "response" in df.columns
6096+
assert "reference" not in df.columns
6097+
assert "intermediate_events" not in df.columns
6098+
assert "agent_data" not in df.columns
6099+
6100+
def test_transform_dataframe_drops_empty_prompt_and_response(self):
6101+
rows = [
6102+
{
6103+
"prompt": None,
6104+
"candidate1": None,
6105+
}
6106+
]
6107+
eval_dfs = _evals_common._transform_dataframe(rows)
6108+
assert len(eval_dfs) == 1
6109+
df = eval_dfs[0].eval_dataset_df
6110+
assert "prompt" not in df.columns
6111+
assert "response" not in df.columns
6112+
6113+
59986114
class TestConvertRequestToDatasetRow:
59996115
"""Unit tests for the _convert_request_to_dataset_row function."""
60006116

vertexai/_genai/_evals_common.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,7 @@ def _execute_inference(
11391139
elif callable(model):
11401140
candidate_name = getattr(model, "__name__", None)
11411141

1142+
results_df = _drop_empty_columns(results_df)
11421143
evaluation_dataset = types.EvaluationDataset(
11431144
eval_dataset_df=results_df,
11441145
candidate_name=candidate_name,
@@ -1187,6 +1188,7 @@ def _execute_inference(
11871188
end_time = time.time()
11881189
logger.info("Agent Run completed in %.2f seconds.", end_time - start_time)
11891190

1191+
results_df = _drop_empty_columns(results_df)
11901192
evaluation_dataset = types.EvaluationDataset(
11911193
eval_dataset_df=results_df,
11921194
candidate_name=candidate_name,
@@ -2060,6 +2062,20 @@ def _convert_request_to_dataset_row(
20602062
return dict_row
20612063

20622064

2065+
def _drop_empty_columns(df: "pd.DataFrame") -> "pd.DataFrame":
2066+
"""Drops columns that are all None or all empty lists/dicts."""
2067+
if df is None or df.empty or pd is None:
2068+
return df
2069+
2070+
def is_empty(x: Any) -> bool:
2071+
if isinstance(x, (list, dict)):
2072+
return not x
2073+
return pd.isna(x) # type: ignore[no-any-return]
2074+
2075+
cols_to_drop = [col for col in df.columns if df[col].apply(is_empty).all()]
2076+
return df.drop(columns=cols_to_drop)
2077+
2078+
20632079
def _transform_dataframe(
20642080
rows: list[dict[str, Any]],
20652081
) -> list[types.EvaluationDataset]:
@@ -2077,13 +2093,16 @@ def _transform_dataframe(
20772093
col for col in df.columns if col not in _evals_constant.COMMON_DATASET_COLUMNS
20782094
]
20792095

2080-
eval_dfs = [
2081-
types.EvaluationDataset(
2082-
candidate_name=candidate,
2083-
eval_dataset_df=df.rename(columns={candidate: _evals_constant.RESPONSE}),
2096+
eval_dfs = []
2097+
for candidate in candidates:
2098+
temp_df = df.rename(columns={candidate: _evals_constant.RESPONSE})
2099+
temp_df = _drop_empty_columns(temp_df)
2100+
eval_dfs.append(
2101+
types.EvaluationDataset(
2102+
candidate_name=candidate,
2103+
eval_dataset_df=temp_df,
2104+
)
20842105
)
2085-
for candidate in candidates
2086-
]
20872106
return eval_dfs
20882107

20892108

0 commit comments

Comments
 (0)