Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,47 @@
pytestmark = pytest.mark.usefixtures("google_auth_mock")


class TestDropEmptyColumns:
"""Unit tests for the _drop_empty_columns function."""

def test_drop_empty_columns(self):
df = pd.DataFrame(
{
"col1": [1, 2, 3],
"col2": [None, None, None],
"col3": [[], [], []],
"col4": [{}, {}, {}],
"col5": [1, None, []],
}
)
result_df = _evals_common._drop_empty_columns(df)
assert "col1" in result_df.columns
assert "col2" not in result_df.columns
assert "col3" not in result_df.columns
assert "col4" not in result_df.columns
assert "col5" in result_df.columns

def test_drop_empty_columns_all_empty(self):
df = pd.DataFrame(
{
"col1": [None, None, None],
"col2": [[], [], []],
}
)
result_df = _evals_common._drop_empty_columns(df)
assert result_df.empty

def test_drop_empty_columns_none_empty(self):
df = pd.DataFrame(
{
"col1": [1, 2, 3],
"col2": ["a", "b", "c"],
}
)
result_df = _evals_common._drop_empty_columns(df)
assert list(result_df.columns) == ["col1", "col2"]


def _create_content_dump(text: str) -> dict[str, list[genai_types.Content]]:
return {
"contents": [
Expand Down Expand Up @@ -389,6 +430,45 @@ def setup_method(self):
)
self.client = vertexai.Client(project=_TEST_PROJECT, location=_TEST_LOCATION)

@mock.patch.object(_evals_common, "Models")
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
def test_inference_drops_empty_columns(self, mock_eval_dataset_loader, mock_models):
mock_df = pd.DataFrame(
{
"prompt": ["test prompt 1", "test prompt 2"],
"empty_col": [None, None],
"empty_list_col": [[], []],
}
)
mock_eval_dataset_loader.return_value.load.return_value = mock_df.to_dict(
orient="records"
)

mock_generate_content_response = genai_types.GenerateContentResponse(
candidates=[
genai_types.Candidate(
content=genai_types.Content(
parts=[genai_types.Part(text="test response")]
),
finish_reason=genai_types.FinishReason.STOP,
)
],
prompt_feedback=None,
)
mock_models.return_value.generate_content.return_value = (
mock_generate_content_response
)

inference_result = self.client.evals.run_inference(
model="gemini-pro",
src=mock_df,
)

assert "empty_col" not in inference_result.eval_dataset_df.columns
assert "empty_list_col" not in inference_result.eval_dataset_df.columns
assert "prompt" in inference_result.eval_dataset_df.columns
assert "response" in inference_result.eval_dataset_df.columns

@mock.patch.object(_evals_common, "Models")
@mock.patch.object(_evals_utils, "EvalDatasetLoader")
def test_inference_with_string_model_success(
Expand Down Expand Up @@ -5995,6 +6075,42 @@ async def test_async_generate_user_scenarios(self):
self.mock_api_client.async_request.assert_called_once()


class TestTransformDataframe:
"""Unit tests for the _transform_dataframe function."""

def test_transform_dataframe_drops_empty_columns(self):
rows = [
{
"prompt": "test prompt",
"reference": None,
"intermediate_events": [],
"agent_data": None,
"candidate1": "test response",
}
]
eval_dfs = _evals_common._transform_dataframe(rows)
assert len(eval_dfs) == 1
df = eval_dfs[0].eval_dataset_df
assert "prompt" in df.columns
assert "response" in df.columns
assert "reference" not in df.columns
assert "intermediate_events" not in df.columns
assert "agent_data" not in df.columns

def test_transform_dataframe_drops_empty_prompt_and_response(self):
rows = [
{
"prompt": None,
"candidate1": None,
}
]
eval_dfs = _evals_common._transform_dataframe(rows)
assert len(eval_dfs) == 1
df = eval_dfs[0].eval_dataset_df
assert "prompt" not in df.columns
assert "response" not in df.columns


class TestConvertRequestToDatasetRow:
"""Unit tests for the _convert_request_to_dataset_row function."""

Expand Down
31 changes: 25 additions & 6 deletions vertexai/_genai/_evals_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,7 @@ def _execute_inference(
elif callable(model):
candidate_name = getattr(model, "__name__", None)

results_df = _drop_empty_columns(results_df)
evaluation_dataset = types.EvaluationDataset(
eval_dataset_df=results_df,
candidate_name=candidate_name,
Expand Down Expand Up @@ -1187,6 +1188,7 @@ def _execute_inference(
end_time = time.time()
logger.info("Agent Run completed in %.2f seconds.", end_time - start_time)

results_df = _drop_empty_columns(results_df)
evaluation_dataset = types.EvaluationDataset(
eval_dataset_df=results_df,
candidate_name=candidate_name,
Expand Down Expand Up @@ -2060,6 +2062,20 @@ def _convert_request_to_dataset_row(
return dict_row


def _drop_empty_columns(df: "pd.DataFrame") -> "pd.DataFrame":
"""Drops columns that are all None or all empty lists/dicts."""
if df is None or df.empty or pd is None:
return df

def is_empty(x: Any) -> bool:
if isinstance(x, (list, dict)):
return not x
return pd.isna(x) # type: ignore[no-any-return]

cols_to_drop = [col for col in df.columns if df[col].apply(is_empty).all()]
return df.drop(columns=cols_to_drop)


def _transform_dataframe(
rows: list[dict[str, Any]],
) -> list[types.EvaluationDataset]:
Expand All @@ -2077,13 +2093,16 @@ def _transform_dataframe(
col for col in df.columns if col not in _evals_constant.COMMON_DATASET_COLUMNS
]

eval_dfs = [
types.EvaluationDataset(
candidate_name=candidate,
eval_dataset_df=df.rename(columns={candidate: _evals_constant.RESPONSE}),
eval_dfs = []
for candidate in candidates:
temp_df = df.rename(columns={candidate: _evals_constant.RESPONSE})
temp_df = _drop_empty_columns(temp_df)
eval_dfs.append(
types.EvaluationDataset(
candidate_name=candidate,
eval_dataset_df=temp_df,
)
)
for candidate in candidates
]
return eval_dfs


Expand Down
Loading