From 641e2ed9ba0783153e00dd9fc864bf044466d1fa Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Sat, 4 Apr 2026 23:41:52 -0700
Subject: [PATCH] feat: GenAI Client(evals) - add rich HTML visualization for
 loss pattern analysis

PiperOrigin-RevId: 894799725
---
 tests/unit/vertexai/genai/test_evals.py |  97 +++++-
 vertexai/_genai/_evals_visualization.py | 378 +++++++++++++++++++++---
 vertexai/_genai/types/common.py         |  61 ++--
 3 files changed, 449 insertions(+), 87 deletions(-)

diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
index a2844e36cd..4e2e696941 100644
--- a/tests/unit/vertexai/genai/test_evals.py
+++ b/tests/unit/vertexai/genai/test_evals.py
@@ -520,7 +520,79 @@ def test_response_structure(self):
         assert result.clusters[0].item_count == 3
         assert result.clusters[1].cluster_id == "cluster-2"
 
-    def test_response_show_with_results(self, capsys):
+    def test_get_loss_analysis_html(self):
+        """Tests that _get_loss_analysis_html generates valid HTML with data."""
+        from vertexai._genai import _evals_visualization
+        import json
+
+        data = {
+            "results": [
+                {
+                    "config": {
+                        "metric": "test_metric",
+                        "candidate": "test-candidate",
+                    },
+                    "clusters": [
+                        {
+                            "cluster_id": "c1",
+                            "taxonomy_entry": {
+                                "l1_category": "Tool Calling",
+                                "l2_category": "Missing Invocation",
+                                "description": "Agent failed to call the tool.",
+                            },
+                            "item_count": 5,
+                            "examples": [
+                                {
+                                    "evaluation_result": {
+                                        "request": {
+                                            "prompt": {
+                                                "agent_data": {
+                                                    "turns": [
+                                                        {
+                                                            "turn_index": 0,
+                                                            "events": [
+                                                                {
+                                                                    "author": "user",
+                                                                    "content": {
+                                                                        "parts": [
+                                                                            {
+                                                                                "text": "Find flights to Paris"
+                                                                            }
+                                                                        ],
+                                                                    },
+                                                                }
+                                                            ],
+                                                        }
+                                                    ],
+                                                },
+                                            },
+                                        },
+                                    },
+                                    "failed_rubrics": [
+                                        {
+                                            "rubric_id": "tool_use",
+                                            "classification_rationale": "Did not invoke find_flights.",
+                                        }
+                                    ],
+                                }
+                            ],
+                        },
+                    ],
+                }
+            ]
+        }
+        html = _evals_visualization._get_loss_analysis_html(json.dumps(data))
+        assert "Loss Pattern Analysis" in html
+        assert "test_metric" not in html  # data is Base64-encoded in the HTML
+        assert "<!DOCTYPE html>" in html
+        assert "extractScenarioPreview" in html
+        assert "example-scenario" in html
+
+    def test_display_loss_clusters_response_no_ipython(self):
+        """Tests graceful fallback when not in IPython."""
+        from vertexai._genai import _evals_visualization
+        from unittest import mock
+
         response = common_types.GenerateLossClustersResponse(
             results=[
                 common_types.LossAnalysisResult(
@@ -541,12 +613,17 @@ def test_response_show_with_results(self, capsys):
                 )
             ],
         )
-        response.show()
-        captured = capsys.readouterr()
-        assert "test_metric" in captured.out
-        assert "c1" in captured.out
+        with mock.patch.object(
+            _evals_visualization, "_is_ipython_env", return_value=False
+        ):
+            # Should not raise, just log a warning
+            response.show()
+
+    def test_display_loss_analysis_result_no_ipython(self):
+        """Tests graceful fallback for individual result when not in IPython."""
+        from vertexai._genai import _evals_visualization
+        from unittest import mock
 
-    def test_loss_analysis_result_show(self, capsys):
         result = common_types.LossAnalysisResult(
             config=common_types.LossAnalysisConfig(
                 metric="test_metric",
@@ -563,10 +640,10 @@ def test_loss_analysis_result_show(self, capsys):
                 ),
             ],
         )
-        result.show()
-        captured = capsys.readouterr()
-        assert "test_metric" in captured.out
-        assert "c1" in captured.out
+        with mock.patch.object(
+            _evals_visualization, "_is_ipython_env", return_value=False
+        ):
+            result.show()
 
 
 def _make_eval_result(
diff --git a/vertexai/_genai/_evals_visualization.py b/vertexai/_genai/_evals_visualization.py
index d9319f7406..ed151a199f 100644
--- a/vertexai/_genai/_evals_visualization.py
+++ b/vertexai/_genai/_evals_visualization.py
@@ -1439,56 +1439,338 @@ def display_evaluation_result(
 
 
 def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> None:
-    """Displays an evaluation dataset in an IPython environment."""
-    if not _is_ipython_env():
-        logger.warning("Skipping display: not in an IPython environment.")
-        return
-    else:
-        from IPython import display
+  """Displays an evaluation dataset in an IPython environment."""
+  if not _is_ipython_env():
+    logger.warning("Skipping display: not in an IPython environment.")
+    return
+  else:
+    from IPython import display
+
+  if (
+      eval_dataset_obj.eval_dataset_df is None
+      or eval_dataset_obj.eval_dataset_df.empty
+  ):
+    logger.warning("No inference data to display.")
+    return
+
+  processed_rows = []
+  df = eval_dataset_obj.eval_dataset_df
+
+  for _, row in df.iterrows():
+    processed_row = {}
+    for col_name, cell_value in row.items():
+      if col_name in ["prompt", "request", "response"]:
+        processed_row[col_name] = _extract_text_and_raw_json(cell_value)
+      elif col_name == "rubric_groups":
+        # Special handling for rubric_groups to keep it as a dict
+        if isinstance(cell_value, dict):
+          processed_row[col_name] = {
+              k: [  # type: ignore[misc]
+                  (
+                      v_item.model_dump(mode="json")
+                      if hasattr(v_item, "model_dump")
+                      else v_item
+                  )
+                  for v_item in v
+              ]
+              for k, v in cell_value.items()
+          }
+        else:
+          processed_row[col_name] = cell_value
+      else:
+        if isinstance(cell_value, (dict, list)):
+          processed_row[col_name] = json.dumps(  # type: ignore[assignment]
+              cell_value, ensure_ascii=False, default=_pydantic_serializer
+          )
+        else:
+          processed_row[col_name] = cell_value
+    processed_rows.append(processed_row)
 
-    if (
-        eval_dataset_obj.eval_dataset_df is None
-        or eval_dataset_obj.eval_dataset_df.empty
-    ):
-        logger.warning("No inference data to display.")
-        return
+  dataframe_json_string = json.dumps(
+      processed_rows, ensure_ascii=False, default=str
+  )
+  html_content = _get_inference_html(dataframe_json_string)
+  display.display(display.HTML(html_content))
 
-    processed_rows = []
-    df = eval_dataset_obj.eval_dataset_df
-
-    for _, row in df.iterrows():
-        processed_row = {}
-        for col_name, cell_value in row.items():
-            if col_name in ["prompt", "request", "response"]:
-                processed_row[col_name] = _extract_text_and_raw_json(cell_value)
-            elif col_name == "rubric_groups":
-                # Special handling for rubric_groups to keep it as a dict
-                if isinstance(cell_value, dict):
-                    processed_row[col_name] = {
-                        k: [  # type: ignore[misc]
-                            (
-                                v_item.model_dump(mode="json")
-                                if hasattr(v_item, "model_dump")
-                                else v_item
-                            )
-                            for v_item in v
-                        ]
-                        for k, v in cell_value.items()
-                    }
-                else:
-                    processed_row[col_name] = cell_value
-            else:
-                if isinstance(cell_value, (dict, list)):
-                    processed_row[col_name] = json.dumps(  # type: ignore[assignment]
-                        cell_value, ensure_ascii=False, default=_pydantic_serializer
-                    )
-                else:
-                    processed_row[col_name] = cell_value
-        processed_rows.append(processed_row)
-
-    dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str)
-    html_content = _get_inference_html(dataframe_json_string)
-    display.display(display.HTML(html_content))
+
+def _get_loss_analysis_html(loss_analysis_json: str) -> str:
+  """Returns self-contained HTML for loss pattern analysis visualization."""
+  payload_b64 = _encode_to_base64(loss_analysis_json)
+  return textwrap.dedent(f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <title>Loss Pattern Analysis</title>
+    <style>
+        body {{ font-family: 'Roboto', 'Helvetica', sans-serif; margin: 2em; background-color: #f8f9fa; color: #202124; }}
+        .container {{ max-width: 1200px; margin: 20px auto; padding: 20px; background-color: white; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.12); }}
+        h1, h2, h3 {{ color: #3c4043; }}
+        h1 {{ border-bottom: 2px solid #4285F4; padding-bottom: 8px; }}
+        h2 {{ border-bottom: 1px solid #dadce0; padding-bottom: 8px; }}
+        table {{ border-collapse: collapse; width: 100%; margin: 1em 0; }}
+        th, td {{ border: 1px solid #dadce0; padding: 12px; text-align: left; vertical-align: top; }}
+        th {{ background-color: #f2f2f2; font-weight: 500; }}
+        details {{ border: 1px solid #dadce0; border-radius: 8px; padding: 16px; margin-bottom: 16px; background: #fff; }}
+        summary {{ font-weight: 500; font-size: 1.1em; cursor: pointer; }}
+
+        .metric-header {{ display: flex; align-items: baseline; gap: 12px; margin-bottom: 4px; }}
+        .metric-label {{ color: #1a73e8; font-weight: 600; font-size: 1.1em; }}
+        .candidate-label {{ color: #5f6368; font-size: 0.95em; }}
+        .item-count {{ font-weight: bold; font-size: 16px; color: #1a73e8; }}
+
+        .cluster-card {{ background: #fff; border: 1px solid #dadce0; border-radius: 8px; margin-bottom: 12px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); overflow: hidden; }}
+        .cluster-summary {{ list-style: none; cursor: pointer; padding: 12px 16px; display: flex; align-items: center; justify-content: space-between; background: #f8f9fa; margin: 0; outline: none; }}
+        .cluster-summary::-webkit-details-marker {{ display: none; }}
+        .cluster-card[open] .cluster-summary {{ border-bottom: 1px solid #dadce0; }}
+
+        .cluster-name {{ display: flex; align-items: center; font-weight: 600; color: #3c4043; font-size: 14px; }}
+        .cluster-name::before {{ content: '►'; font-size: 0.8em; margin-right: 8px; transition: transform 0.2s; color: #5f6368; }}
+        .cluster-card[open] .cluster-name::before {{ transform: rotate(90deg); }}
+
+        .l1-pill {{ display: inline-block; background-color: #e8f0fe; color: #1967d2; border-radius: 16px; padding: 2px 10px; font-size: 0.85em; font-weight: 500; margin-right: 6px; }}
+        .cluster-body {{ padding: 16px; }}
+        .cluster-description {{ color: #5f6368; font-style: italic; font-size: 0.95em; margin-bottom: 12px; line-height: 1.5; }}
+
+        .example-card {{ background: #f8f9fa; border: 1px solid #eee; border-radius: 6px; padding: 12px; margin-bottom: 8px; }}
+        .example-scenario {{ background: #e8f0fe; border-radius: 6px; padding: 8px 12px; margin: 6px 0; font-size: 0.9em; color: #1967d2; display: flex; align-items: flex-start; gap: 6px; }}
+        .example-scenario-icon {{ flex-shrink: 0; margin-top: 1px; }}
+        .example-scenario-text {{ word-break: break-word; }}
+        .example-rubric {{ display: inline-block; background-color: #fce8e6; color: #c5221f; border-radius: 12px; padding: 2px 10px; font-size: 0.85em; font-weight: 500; margin-right: 6px; margin-bottom: 4px; }}
+        .example-rationale {{ color: #5f6368; font-size: 0.9em; line-height: 1.5; margin-top: 6px; background: #f1f1f1; padding: 8px 12px; border-radius: 4px; white-space: pre-wrap; word-wrap: break-word; }}
+        .examples-details {{ border: none; padding: 0; margin-top: 8px; }}
+        .examples-details > summary {{ font-size: 0.95em; color: #1a73e8; cursor: pointer; }}
+        .no-data {{ color: #5f6368; font-style: italic; padding: 16px; text-align: center; }}
+    </style>
+</head>
+<body>
+<div class="container">
+<div id="loss-analysis-root"></div>
+</div>
+<script>
+(function() {{
+    const raw = atob("{payload_b64}");
+    const data = JSON.parse(raw);
+
+    const root = document.getElementById('loss-analysis-root');
+    const results = data.results || [];
+
+    if (results.length === 0) {{
+        root.innerHTML = '<h1>Loss Pattern Analysis</h1><p class="no-data">No loss analysis results found.</p>';
+        return;
+    }}
+
+    let html = '<h1>Loss Pattern Analysis</h1>';
+
+    // Summary table
+    if (results.length > 0) {{
+        html += '<h2>Summary</h2><table><thead><tr><th>Metric</th><th>Candidate</th><th>Clusters</th><th>Total Failures</th></tr></thead><tbody>';
+        results.forEach(r => {{
+            const cfg = r.config || {{}};
+            const clusters = r.clusters || [];
+            const totalItems = clusters.reduce((sum, c) => sum + (c.item_count || 0), 0);
+            html += '<tr>';
+            html += '<td>' + escapeHtml(cfg.metric || 'N/A') + '</td>';
+            html += '<td>' + escapeHtml(cfg.candidate || 'N/A') + '</td>';
+            html += '<td>' + clusters.length + '</td>';
+            html += '<td class="item-count">' + totalItems + '</td>';
+            html += '</tr>';
+        }});
+        html += '</tbody></table>';
+    }}
+
+    // Per-result detail sections
+    results.forEach((r, ri) => {{
+        const cfg = r.config || {{}};
+        const clusters = r.clusters || [];
+
+        html += '<h2>';
+        html += '<span class="metric-label">' + escapeHtml(cfg.metric || 'Unknown Metric') + '</span>';
+        if (cfg.candidate) html += ' <span class="candidate-label">/ ' + escapeHtml(cfg.candidate) + '</span>';
+        html += '</h2>';
+
+        if (clusters.length === 0) {{
+            html += '<p class="no-data">No loss clusters found for this metric.</p>';
+            return;
+        }}
+
+        // Sort clusters by item_count descending
+        clusters.sort((a, b) => (b.item_count || 0) - (a.item_count || 0));
+
+        // Overview table
+        html += '<table><thead><tr><th>#</th><th>L1 Category</th><th>L2 Category</th><th>Failures</th></tr></thead><tbody>';
+        clusters.forEach((c, ci) => {{
+            const entry = c.taxonomy_entry || {{}};
+            html += '<tr>';
+            html += '<td>' + (ci + 1) + '</td>';
+            html += '<td><span class="l1-pill">' + escapeHtml(entry.l1_category || 'N/A') + '</span></td>';
+            html += '<td>' + escapeHtml(entry.l2_category || 'N/A') + '</td>';
+            html += '<td class="item-count">' + (c.item_count || 0) + '</td>';
+            html += '</tr>';
+        }});
+        html += '</tbody></table>';
+
+        // Cluster detail cards
+        clusters.forEach((c, ci) => {{
+            const entry = c.taxonomy_entry || {{}};
+            const examples = c.examples || [];
+            const isFirst = ci === 0;
+
+            html += '<details class="cluster-card"' + (isFirst ? ' open' : '') + '>';
+            html += '<summary class="cluster-summary">';
+            html += '<div class="cluster-name">';
+            html += '<span class="l1-pill">' + escapeHtml(entry.l1_category || '') + '</span> ';
+            html += escapeHtml(entry.l2_category || 'Cluster ' + (ci + 1));
+            html += '</div>';
+            html += '<span class="item-count">' + (c.item_count || 0) + ' failures</span>';
+            html += '</summary>';
+
+            html += '<div class="cluster-body">';
+
+            if (entry.description) {{
+                html += '<div class="cluster-description">' + escapeHtml(entry.description) + '</div>';
+            }}
+
+            // Examples section
+            if (examples.length > 0) {{
+                html += '<details class="examples-details">';
+                html += '<summary>Examples (' + examples.length + ')</summary>';
+                html += '<div style="margin-top: 8px;">';
+                examples.forEach((ex, ei) => {{
+                    html += '<div class="example-card">';
+                    html += '<strong>Example ' + (ei + 1) + '</strong>';
+                    const scenario = extractScenarioPreview(ex);
+                    if (scenario) {{
+                        html += '<div class="example-scenario">';
+                        html += '<span class="example-scenario-icon">';
+                        html += '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path></svg>';
+                        html += '</span>';
+                        html += '<span class="example-scenario-text">' + escapeHtml(scenario) + '</span>';
+                        html += '</div>';
+                    }}
+                    const rubrics = ex.failed_rubrics || [];
+                    if (rubrics.length > 0) {{
+                        html += '<div style="margin-top: 6px;">';
+                        rubrics.forEach(fr => {{
+                            if (fr.rubric_id) {{
+                                html += '<span class="example-rubric">' + escapeHtml(fr.rubric_id) + '</span>';
+                            }}
+                            if (fr.classification_rationale) {{
+                                html += '<div class="example-rationale">' + escapeHtml(fr.classification_rationale) + '</div>';
+                            }}
+                        }});
+                        html += '</div>';
+                    }}
+                    html += '</div>';
+                }});
+                html += '</div></details>';
+            }}
+
+            html += '</div></details>';
+        }});
+    }});
+
+    root.innerHTML = html;
+
+    function escapeHtml(str) {{
+        if (str === null || str === undefined) return '';
+        return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+    }}
+
+    function extractScenarioPreview(ex) {{
+        // Extract the first user message from evaluation_result as a scenario preview.
+        const er = ex.evaluation_result;
+        if (!er) return null;
+        // Try agent_data path: request.prompt.agent_data.turns[].events[]
+        const agentData = (er.request && er.request.prompt && er.request.prompt.agent_data) ? er.request.prompt.agent_data : null;
+        if (agentData && agentData.turns) {{
+            for (const turn of agentData.turns) {{
+                if (!turn.events) continue;
+                for (const event of turn.events) {{
+                    const role = event.author || (event.content && event.content.role) || '';
+                    if (role.toLowerCase() === 'user' && event.content && event.content.parts) {{
+                        for (const part of event.content.parts) {{
+                            if (part.text) {{
+                                const text = part.text.trim();
+                                return text.length > 150 ? text.substring(0, 150) + '...' : text;
+                            }}
+                        }}
+                    }}
+                }}
+            }}
+        }}
+        // Try simple prompt path: request.prompt.parts[].text
+        const prompt = er.request && er.request.prompt;
+        if (prompt && prompt.parts) {{
+            for (const part of prompt.parts) {{
+                if (part.text) {{
+                    const text = part.text.trim();
+                    return text.length > 150 ? text.substring(0, 150) + '...' : text;
+                }}
+            }}
+        }}
+        return null;
+    }}
+}})();
+</script>
+</body>
+</html>
+""")
+
+
+def display_loss_clusters_response(
+    response_obj: "types.GenerateLossClustersResponse",
+) -> None:
+  """Displays a GenerateLossClustersResponse in an IPython environment."""
+  if not _is_ipython_env():
+    logger.warning("Skipping display: not in an IPython environment.")
+    return
+  else:
+    from IPython import display
+
+  try:
+    result_dump = response_obj.model_dump(mode="json", exclude_none=True)
+  except Exception as e:
+    logger.error(
+        "Failed to serialize GenerateLossClustersResponse: %s",
+        e,
+        exc_info=True,
+    )
+    raise
+
+  html_content = _get_loss_analysis_html(
+      json.dumps(result_dump, ensure_ascii=False, default=_pydantic_serializer)
+  )
+  display.display(display.HTML(html_content))
+
+
+def display_loss_analysis_result(
+    result_obj: "types.LossAnalysisResult",
+) -> None:
+  """Displays a single LossAnalysisResult in an IPython environment."""
+  if not _is_ipython_env():
+    logger.warning("Skipping display: not in an IPython environment.")
+    return
+  else:
+    from IPython import display
+
+  try:
+    # Wrap in a response-like structure for the shared HTML generator
+    wrapped = {
+        "results": [result_obj.model_dump(mode="json", exclude_none=True)]
+    }
+  except Exception as e:
+    logger.error(
+        "Failed to serialize LossAnalysisResult: %s",
+        e,
+        exc_info=True,
+    )
+    raise
+
+  html_content = _get_loss_analysis_html(
+      json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer)
+  )
+  display.display(display.HTML(html_content))
 
 
 def _get_status_html(status: str, error_message: Optional[str] = None) -> str:
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
index cc115e03cd..e0bfb329eb 100644
--- a/vertexai/_genai/types/common.py
+++ b/vertexai/_genai/types/common.py
@@ -4967,24 +4967,25 @@ class LossClusterDict(TypedDict, total=False):
 
 
 class LossAnalysisResult(_common.BaseModel):
-    """The top-level result for loss analysis."""
+  """The top-level result for loss analysis."""
 
-    config: Optional[LossAnalysisConfig] = Field(
-        default=None,
-        description="""The configuration used to generate this analysis.""",
-    )
-    analysis_time: Optional[str] = Field(
-        default=None, description="""The timestamp when this analysis was performed."""
-    )
-    clusters: Optional[list[LossCluster]] = Field(
-        default=None, description="""The list of identified loss clusters."""
-    )
+  config: Optional[LossAnalysisConfig] = Field(
+      default=None,
+      description="""The configuration used to generate this analysis.""",
+  )
+  analysis_time: Optional[str] = Field(
+      default=None,
+      description="""The timestamp when this analysis was performed.""",
+  )
+  clusters: Optional[list[LossCluster]] = Field(
+      default=None, description="""The list of identified loss clusters."""
+  )
 
-    def show(self) -> None:
-        """Shows the loss analysis result as a formatted pandas DataFrame."""
-        from .. import _evals_utils
+  def show(self) -> None:
+    """Shows the loss analysis result with rich HTML visualization."""
+    from .. import _evals_visualization
 
-        _evals_utils._display_loss_analysis_result(self)
+    _evals_visualization.display_loss_analysis_result(self)
 
 
 class LossAnalysisResultDict(TypedDict, total=False):
@@ -5004,20 +5005,22 @@ class LossAnalysisResultDict(TypedDict, total=False):
 
 
 class GenerateLossClustersResponse(_common.BaseModel):
-    """Response message for EvaluationAnalyticsService.GenerateLossClusters."""
-
-    analysis_time: Optional[str] = Field(
-        default=None, description="""The timestamp when this analysis was completed."""
-    )
-    results: Optional[list[LossAnalysisResult]] = Field(
-        default=None,
-        description="""The analysis results, one per config provided in the request.""",
-    )
-
-    def show(self) -> None:
-        """Shows all loss analysis results."""
-        for result in self.results or []:
-            result.show()
+  """Response message for EvaluationAnalyticsService.GenerateLossClusters."""
+
+  analysis_time: Optional[str] = Field(
+      default=None,
+      description="""The timestamp when this analysis was completed.""",
+  )
+  results: Optional[list[LossAnalysisResult]] = Field(
+      default=None,
+      description="""The analysis results, one per config provided in the request.""",
+  )
+
+  def show(self) -> None:
+    """Shows the loss pattern analysis report with rich HTML visualization."""
+    from .. import _evals_visualization
+
+    _evals_visualization.display_loss_clusters_response(self)
 
 
 class GenerateLossClustersResponseDict(TypedDict, total=False):