feat: Add support for referencing registered metrics by resource name in evaluation run API

vertex-sdk-bot · copybara-github · commit 19aff73b8fcd · 2026-03-11T09:21:48.000-07:00
PiperOrigin-RevId: 878604099
diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -238,6 +238,28 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.error is None
 
 
+def test_create_eval_run_with_metric_resource_name(client):
+    """Tests create_evaluation_run with metric_resource_name."""
+    client._api_client._http_options.api_version = "v1beta1"
+    client._api_client._http_options.base_url = (
+        "https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
+    )
+    metric_resource_name = "projects/977012026409/locations/us-central1/evaluationMetrics/6048334299558576128"
+    metric = types.EvaluationRunMetric(
+        metric="my_custom_metric",
+        metric_resource_name=metric_resource_name,
+    )
+    evaluation_run = client.evals.create_evaluation_run(
+        dataset=types.EvaluationDataset(
+            eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY
+        ),
+        metrics=[metric],
+        dest=GCS_DEST,
+    )
+    assert isinstance(evaluation_run, types.EvaluationRun)
+    assert evaluation_run.evaluation_config.metrics[0].metric == "my_custom_metric"
+
+
 # Dataframe tests fail in replay mode because of UUID generation mismatch.
 # def test_create_eval_run_data_source_evaluation_dataset(client):
 #     """Tests that create_evaluation_run() creates a correctly structured
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
@@ -45,6 +45,7 @@
 from . import _gcs_utils
 from . import evals
 from . import types
+from . import _transformers as t
 
 logger = logging.getLogger(__name__)
 
@@ -1328,7 +1329,7 @@ def _resolve_dataset_inputs(
 
 
 def _resolve_evaluation_run_metrics(
-    metrics: list[types.EvaluationRunMetric], api_client: Any
+    metrics: Union[list[types.EvaluationRunMetric], list[types.Metric]], api_client: Any
 ) -> list[types.EvaluationRunMetric]:
     """Resolves a list of evaluation run metric instances, loading RubricMetric if necessary."""
     if not metrics:
@@ -1361,6 +1362,16 @@ def _resolve_evaluation_run_metrics(
                     e,
                 )
                 raise
+        elif isinstance(metric_instance, types.Metric):
+            config_dict = t.t_metrics([metric_instance])[0]
+            res_name = config_dict.pop("metric_resource_name", None)
+            resolved_metrics_list.append(
+                types.EvaluationRunMetric(
+                    metric=metric_instance.name,
+                    metric_config=config_dict if config_dict else None,
+                    metric_resource_name=res_name,
+                )
+            )
         else:
             try:
                 metric_name_str = str(metric_instance)
diff --git a/vertexai/_genai/_transformers.py b/vertexai/_genai/_transformers.py
@@ -38,6 +38,8 @@ def t_metrics(
 
     for metric in metrics:
         metric_payload_item: dict[str, Any] = {}
+        if hasattr(metric, "metric_resource_name") and metric.metric_resource_name:
+            metric_payload_item["metric_resource_name"] = metric.metric_resource_name
 
         metric_name = getv(metric, ["name"]).lower()
 
@@ -79,6 +81,9 @@ def t_metrics(
                     "return_raw_output": return_raw_output
                 }
             metric_payload_item["pointwise_metric_spec"] = pointwise_spec
+        elif "metric_resource_name" in metric_payload_item:
+            # Valid case: Metric is identified by resource name; no inline spec required.
+            pass
         else:
             raise ValueError(
                 f"Unsupported metric type or invalid metric name: {metric_name}"
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
@@ -392,6 +392,13 @@ def _EvaluationRunMetric_from_vertex(
     if getv(from_object, ["metric"]) is not None:
         setv(to_object, ["metric"], getv(from_object, ["metric"]))
 
+    if getv(from_object, ["metricResourceName"]) is not None:
+        setv(
+            to_object,
+            ["metric_resource_name"],
+            getv(from_object, ["metricResourceName"]),
+        )
+
     if getv(from_object, ["metricConfig"]) is not None:
         setv(
             to_object,
@@ -410,6 +417,13 @@ def _EvaluationRunMetric_to_vertex(
     if getv(from_object, ["metric"]) is not None:
         setv(to_object, ["metric"], getv(from_object, ["metric"]))
 
+    if getv(from_object, ["metric_resource_name"]) is not None:
+        setv(
+            to_object,
+            ["metricResourceName"],
+            getv(from_object, ["metric_resource_name"]),
+        )
+
     if getv(from_object, ["metric_config"]) is not None:
         setv(
             to_object,
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
@@ -2479,6 +2479,10 @@ class EvaluationRunMetric(_common.BaseModel):
     metric: Optional[str] = Field(
         default=None, description="""The name of the metric."""
     )
+    metric_resource_name: Optional[str] = Field(
+        default=None,
+        description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
+    )
     metric_config: Optional[UnifiedMetric] = Field(
         default=None, description="""The unified metric used for evaluation run."""
     )
@@ -2490,6 +2494,9 @@ class EvaluationRunMetricDict(TypedDict, total=False):
     metric: Optional[str]
     """The name of the metric."""
 
+    metric_resource_name: Optional[str]
+    """The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
+
     metric_config: Optional[UnifiedMetricDict]
     """The unified metric used for evaluation run."""
 
@@ -4439,6 +4446,10 @@ class Metric(_common.BaseModel):
         default=None,
         description="""Optional steering instruction parameters for the automated predefined metric.""",
     )
+    metric_resource_name: Optional[str] = Field(
+        default=None,
+        description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
+    )
 
     # Allow extra fields to support metric-specific config fields.
     model_config = ConfigDict(extra="allow")
@@ -4643,6 +4654,9 @@ class MetricDict(TypedDict, total=False):
     metric_spec_parameters: Optional[dict[str, Any]]
     """Optional steering instruction parameters for the automated predefined metric."""
 
+    metric_resource_name: Optional[str]
+    """The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
+
 
 MetricOrDict = Union[Metric, MetricDict]