From 641e2ed9ba0783153e00dd9fc864bf044466d1fa Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Sat, 4 Apr 2026 23:41:52 -0700 Subject: [PATCH] feat: GenAI Client(evals) - add rich HTML visualization for loss pattern analysis PiperOrigin-RevId: 894799725 --- tests/unit/vertexai/genai/test_evals.py | 97 +++++- vertexai/_genai/_evals_visualization.py | 378 +++++++++++++++++++++--- vertexai/_genai/types/common.py | 61 ++-- 3 files changed, 449 insertions(+), 87 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index a2844e36cd..4e2e696941 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -520,7 +520,79 @@ def test_response_structure(self): assert result.clusters[0].item_count == 3 assert result.clusters[1].cluster_id == "cluster-2" - def test_response_show_with_results(self, capsys): + def test_get_loss_analysis_html(self): + """Tests that _get_loss_analysis_html generates valid HTML with data.""" + from vertexai._genai import _evals_visualization + import json + + data = { + "results": [ + { + "config": { + "metric": "test_metric", + "candidate": "test-candidate", + }, + "clusters": [ + { + "cluster_id": "c1", + "taxonomy_entry": { + "l1_category": "Tool Calling", + "l2_category": "Missing Invocation", + "description": "Agent failed to call the tool.", + }, + "item_count": 5, + "examples": [ + { + "evaluation_result": { + "request": { + "prompt": { + "agent_data": { + "turns": [ + { + "turn_index": 0, + "events": [ + { + "author": "user", + "content": { + "parts": [ + { + "text": "Find flights to Paris" + } + ], + }, + } + ], + } + ], + }, + }, + }, + }, + "failed_rubrics": [ + { + "rubric_id": "tool_use", + "classification_rationale": "Did not invoke find_flights.", + } + ], + } + ], + }, + ], + } + ] + } + html = _evals_visualization._get_loss_analysis_html(json.dumps(data)) + assert "Loss Pattern Analysis" in html + assert "test_metric" not in html # data is Base64-encoded in the HTML + assert "" in html + assert "extractScenarioPreview" in html + assert "example-scenario" in html + + def test_display_loss_clusters_response_no_ipython(self): + """Tests graceful fallback when not in IPython.""" + from vertexai._genai import _evals_visualization + from unittest import mock + response = common_types.GenerateLossClustersResponse( results=[ common_types.LossAnalysisResult( @@ -541,12 +613,17 @@ def test_response_show_with_results(self, capsys): ) ], ) - response.show() - captured = capsys.readouterr() - assert "test_metric" in captured.out - assert "c1" in captured.out + with mock.patch.object( + _evals_visualization, "_is_ipython_env", return_value=False + ): + # Should not raise, just log a warning + response.show() + + def test_display_loss_analysis_result_no_ipython(self): + """Tests graceful fallback for individual result when not in IPython.""" + from vertexai._genai import _evals_visualization + from unittest import mock - def test_loss_analysis_result_show(self, capsys): result = common_types.LossAnalysisResult( config=common_types.LossAnalysisConfig( metric="test_metric", @@ -563,10 +640,10 @@ def test_loss_analysis_result_show(self, capsys): ), ], ) - result.show() - captured = capsys.readouterr() - assert "test_metric" in captured.out - assert "c1" in captured.out + with mock.patch.object( + _evals_visualization, "_is_ipython_env", return_value=False + ): + result.show() def _make_eval_result( diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py index d9319f7406..ed151a199f 100644 --- a/vertexai/_genai/_evals_visualization.py +++ b/vertexai/_genai/_evals_visualization.py @@ -1439,56 +1439,338 @@ def display_evaluation_result( def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> None: - """Displays an evaluation dataset in an IPython environment.""" - if not _is_ipython_env(): - logger.warning("Skipping display: not in an IPython environment.") - return - else: - from IPython import display + """Displays an evaluation dataset in an IPython environment.""" + if not _is_ipython_env(): + logger.warning("Skipping display: not in an IPython environment.") + return + else: + from IPython import display + + if ( + eval_dataset_obj.eval_dataset_df is None + or eval_dataset_obj.eval_dataset_df.empty + ): + logger.warning("No inference data to display.") + return + + processed_rows = [] + df = eval_dataset_obj.eval_dataset_df + + for _, row in df.iterrows(): + processed_row = {} + for col_name, cell_value in row.items(): + if col_name in ["prompt", "request", "response"]: + processed_row[col_name] = _extract_text_and_raw_json(cell_value) + elif col_name == "rubric_groups": + # Special handling for rubric_groups to keep it as a dict + if isinstance(cell_value, dict): + processed_row[col_name] = { + k: [ # type: ignore[misc] + ( + v_item.model_dump(mode="json") + if hasattr(v_item, "model_dump") + else v_item + ) + for v_item in v + ] + for k, v in cell_value.items() + } + else: + processed_row[col_name] = cell_value + else: + if isinstance(cell_value, (dict, list)): + processed_row[col_name] = json.dumps( # type: ignore[assignment] + cell_value, ensure_ascii=False, default=_pydantic_serializer + ) + else: + processed_row[col_name] = cell_value + processed_rows.append(processed_row) - if ( - eval_dataset_obj.eval_dataset_df is None - or eval_dataset_obj.eval_dataset_df.empty - ): - logger.warning("No inference data to display.") - return + dataframe_json_string = json.dumps( + processed_rows, ensure_ascii=False, default=str + ) + html_content = _get_inference_html(dataframe_json_string) + display.display(display.HTML(html_content)) - processed_rows = [] - df = eval_dataset_obj.eval_dataset_df - - for _, row in df.iterrows(): - processed_row = {} - for col_name, cell_value in row.items(): - if col_name in ["prompt", "request", "response"]: - processed_row[col_name] = _extract_text_and_raw_json(cell_value) - elif col_name == "rubric_groups": - # Special handling for rubric_groups to keep it as a dict - if isinstance(cell_value, dict): - processed_row[col_name] = { - k: [ # type: ignore[misc] - ( - v_item.model_dump(mode="json") - if hasattr(v_item, "model_dump") - else v_item - ) - for v_item in v - ] - for k, v in cell_value.items() - } - else: - processed_row[col_name] = cell_value - else: - if isinstance(cell_value, (dict, list)): - processed_row[col_name] = json.dumps( # type: ignore[assignment] - cell_value, ensure_ascii=False, default=_pydantic_serializer - ) - else: - processed_row[col_name] = cell_value - processed_rows.append(processed_row) - - dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str) - html_content = _get_inference_html(dataframe_json_string) - display.display(display.HTML(html_content)) + +def _get_loss_analysis_html(loss_analysis_json: str) -> str: + """Returns self-contained HTML for loss pattern analysis visualization.""" + payload_b64 = _encode_to_base64(loss_analysis_json) + return textwrap.dedent(f""" + + + + + Loss Pattern Analysis + + + +
+
+
+ + + +""") + + +def display_loss_clusters_response( + response_obj: "types.GenerateLossClustersResponse", +) -> None: + """Displays a GenerateLossClustersResponse in an IPython environment.""" + if not _is_ipython_env(): + logger.warning("Skipping display: not in an IPython environment.") + return + else: + from IPython import display + + try: + result_dump = response_obj.model_dump(mode="json", exclude_none=True) + except Exception as e: + logger.error( + "Failed to serialize GenerateLossClustersResponse: %s", + e, + exc_info=True, + ) + raise + + html_content = _get_loss_analysis_html( + json.dumps(result_dump, ensure_ascii=False, default=_pydantic_serializer) + ) + display.display(display.HTML(html_content)) + + +def display_loss_analysis_result( + result_obj: "types.LossAnalysisResult", +) -> None: + """Displays a single LossAnalysisResult in an IPython environment.""" + if not _is_ipython_env(): + logger.warning("Skipping display: not in an IPython environment.") + return + else: + from IPython import display + + try: + # Wrap in a response-like structure for the shared HTML generator + wrapped = { + "results": [result_obj.model_dump(mode="json", exclude_none=True)] + } + except Exception as e: + logger.error( + "Failed to serialize LossAnalysisResult: %s", + e, + exc_info=True, + ) + raise + + html_content = _get_loss_analysis_html( + json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer) + ) + display.display(display.HTML(html_content)) def _get_status_html(status: str, error_message: Optional[str] = None) -> str: diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index cc115e03cd..e0bfb329eb 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -4967,24 +4967,25 @@ class LossClusterDict(TypedDict, total=False): class LossAnalysisResult(_common.BaseModel): - """The top-level result for loss analysis.""" + """The top-level result for loss analysis.""" - config: Optional[LossAnalysisConfig] = Field( - default=None, - description="""The configuration used to generate this analysis.""", - ) - analysis_time: Optional[str] = Field( - default=None, description="""The timestamp when this analysis was performed.""" - ) - clusters: Optional[list[LossCluster]] = Field( - default=None, description="""The list of identified loss clusters.""" - ) + config: Optional[LossAnalysisConfig] = Field( + default=None, + description="""The configuration used to generate this analysis.""", + ) + analysis_time: Optional[str] = Field( + default=None, + description="""The timestamp when this analysis was performed.""", + ) + clusters: Optional[list[LossCluster]] = Field( + default=None, description="""The list of identified loss clusters.""" + ) - def show(self) -> None: - """Shows the loss analysis result as a formatted pandas DataFrame.""" - from .. import _evals_utils + def show(self) -> None: + """Shows the loss analysis result with rich HTML visualization.""" + from .. import _evals_visualization - _evals_utils._display_loss_analysis_result(self) + _evals_visualization.display_loss_analysis_result(self) class LossAnalysisResultDict(TypedDict, total=False): @@ -5004,20 +5005,22 @@ class LossAnalysisResultDict(TypedDict, total=False): class GenerateLossClustersResponse(_common.BaseModel): - """Response message for EvaluationAnalyticsService.GenerateLossClusters.""" - - analysis_time: Optional[str] = Field( - default=None, description="""The timestamp when this analysis was completed.""" - ) - results: Optional[list[LossAnalysisResult]] = Field( - default=None, - description="""The analysis results, one per config provided in the request.""", - ) - - def show(self) -> None: - """Shows all loss analysis results.""" - for result in self.results or []: - result.show() + """Response message for EvaluationAnalyticsService.GenerateLossClusters.""" + + analysis_time: Optional[str] = Field( + default=None, + description="""The timestamp when this analysis was completed.""", + ) + results: Optional[list[LossAnalysisResult]] = Field( + default=None, + description="""The analysis results, one per config provided in the request.""", + ) + + def show(self) -> None: + """Shows the loss pattern analysis report with rich HTML visualization.""" + from .. import _evals_visualization + + _evals_visualization.display_loss_clusters_response(self) class GenerateLossClustersResponseDict(TypedDict, total=False):