From be869caaecbcfeae196a20c4916404bd5c2ef2a1 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Tue, 17 Mar 2026 12:50:35 -0700 Subject: [PATCH] fix: GenAI Client(evals): Drop empty columns in evaluation dataset PiperOrigin-RevId: 885162955 --- tests/unit/vertexai/genai/test_evals.py | 116 ++++++++++++++++++++++++ vertexai/_genai/_evals_common.py | 31 +++++-- 2 files changed, 141 insertions(+), 6 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index f16e9ce8d8..2b534d4740 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -53,6 +53,47 @@ pytestmark = pytest.mark.usefixtures("google_auth_mock") +class TestDropEmptyColumns: + """Unit tests for the _drop_empty_columns function.""" + + def test_drop_empty_columns(self): + df = pd.DataFrame( + { + "col1": [1, 2, 3], + "col2": [None, None, None], + "col3": [[], [], []], + "col4": [{}, {}, {}], + "col5": [1, None, []], + } + ) + result_df = _evals_common._drop_empty_columns(df) + assert "col1" in result_df.columns + assert "col2" not in result_df.columns + assert "col3" not in result_df.columns + assert "col4" not in result_df.columns + assert "col5" in result_df.columns + + def test_drop_empty_columns_all_empty(self): + df = pd.DataFrame( + { + "col1": [None, None, None], + "col2": [[], [], []], + } + ) + result_df = _evals_common._drop_empty_columns(df) + assert result_df.empty + + def test_drop_empty_columns_none_empty(self): + df = pd.DataFrame( + { + "col1": [1, 2, 3], + "col2": ["a", "b", "c"], + } + ) + result_df = _evals_common._drop_empty_columns(df) + assert list(result_df.columns) == ["col1", "col2"] + + def _create_content_dump(text: str) -> dict[str, list[genai_types.Content]]: return { "contents": [ @@ -389,6 +430,45 @@ def setup_method(self): ) self.client = vertexai.Client(project=_TEST_PROJECT, location=_TEST_LOCATION) + @mock.patch.object(_evals_common, "Models") + @mock.patch.object(_evals_utils, "EvalDatasetLoader") + def test_inference_drops_empty_columns(self, mock_eval_dataset_loader, mock_models): + mock_df = pd.DataFrame( + { + "prompt": ["test prompt 1", "test prompt 2"], + "empty_col": [None, None], + "empty_list_col": [[], []], + } + ) + mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict( + orient="records" + ) + + mock_generate_content_response = genai_types.GenerateContentResponse( + candidates=[ + genai_types.Candidate( + content=genai_types.Content( + parts=[genai_types.Part(text="test response")] + ), + finish_reason=genai_types.FinishReason.STOP, + ) + ], + prompt_feedback=None, + ) + mock_models.return_value.generate_content.return_value = ( + mock_generate_content_response + ) + + inference_result = self.client.evals.run_inference( + model="gemini-pro", + src=mock_df, + ) + + assert "empty_col" not in inference_result.eval_dataset_df.columns + assert "empty_list_col" not in inference_result.eval_dataset_df.columns + assert "prompt" in inference_result.eval_dataset_df.columns + assert "response" in inference_result.eval_dataset_df.columns + @mock.patch.object(_evals_common, "Models") @mock.patch.object(_evals_utils, "EvalDatasetLoader") def test_inference_with_string_model_success( @@ -5995,6 +6075,42 @@ async def test_async_generate_user_scenarios(self): self.mock_api_client.async_request.assert_called_once() +class TestTransformDataframe: + """Unit tests for the _transform_dataframe function.""" + + def test_transform_dataframe_drops_empty_columns(self): + rows = [ + { + "prompt": "test prompt", + "reference": None, + "intermediate_events": [], + "agent_data": None, + "candidate1": "test response", + } + ] + eval_dfs = _evals_common._transform_dataframe(rows) + assert len(eval_dfs) == 1 + df = eval_dfs[0].eval_dataset_df + assert "prompt" in df.columns + assert "response" in df.columns + assert "reference" not in df.columns + assert "intermediate_events" not in df.columns + assert "agent_data" not in df.columns + + def test_transform_dataframe_drops_empty_prompt_and_response(self): + rows = [ + { + "prompt": None, + "candidate1": None, + } + ] + eval_dfs = _evals_common._transform_dataframe(rows) + assert len(eval_dfs) == 1 + df = eval_dfs[0].eval_dataset_df + assert "prompt" not in df.columns + assert "response" not in df.columns + + class TestConvertRequestToDatasetRow: """Unit tests for the _convert_request_to_dataset_row function.""" diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index b156b629ed..9fe93230eb 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -1139,6 +1139,7 @@ def _execute_inference( elif callable(model): candidate_name = getattr(model, "__name__", None) + results_df = _drop_empty_columns(results_df) evaluation_dataset = types.EvaluationDataset( eval_dataset_df=results_df, candidate_name=candidate_name, @@ -1187,6 +1188,7 @@ def _execute_inference( end_time = time.time() logger.info("Agent Run completed in %.2f seconds.", end_time - start_time) + results_df = _drop_empty_columns(results_df) evaluation_dataset = types.EvaluationDataset( eval_dataset_df=results_df, candidate_name=candidate_name, @@ -2060,6 +2062,20 @@ def _convert_request_to_dataset_row( return dict_row +def _drop_empty_columns(df: "pd.DataFrame") -> "pd.DataFrame": + """Drops columns that are all None or all empty lists/dicts.""" + if df is None or df.empty or pd is None: + return df + + def is_empty(x: Any) -> bool: + if isinstance(x, (list, dict)): + return not x + return pd.isna(x) # type: ignore[no-any-return] + + cols_to_drop = [col for col in df.columns if df[col].apply(is_empty).all()] + return df.drop(columns=cols_to_drop) + + def _transform_dataframe( rows: list[dict[str, Any]], ) -> list[types.EvaluationDataset]: @@ -2077,13 +2093,16 @@ def _transform_dataframe( col for col in df.columns if col not in _evals_constant.COMMON_DATASET_COLUMNS ] - eval_dfs = [ - types.EvaluationDataset( - candidate_name=candidate, - eval_dataset_df=df.rename(columns={candidate: _evals_constant.RESPONSE}), + eval_dfs = [] + for candidate in candidates: + temp_df = df.rename(columns={candidate: _evals_constant.RESPONSE}) + temp_df = _drop_empty_columns(temp_df) + eval_dfs.append( + types.EvaluationDataset( + candidate_name=candidate, + eval_dataset_df=temp_df, + ) ) - for candidate in candidates - ] return eval_dfs