Skip to content

Commit 19aff73

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add support for referencing registered metrics by resource name in evaluation run API
PiperOrigin-RevId: 878604099
1 parent 1ecaa9b commit 19aff73

File tree

5 files changed

+67
-1
lines changed

5 files changed

+67
-1
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,28 @@ def test_create_eval_run_with_inference_configs(client):
238238
assert evaluation_run.error is None
239239

240240

241+
def test_create_eval_run_with_metric_resource_name(client):
242+
"""Tests create_evaluation_run with metric_resource_name."""
243+
client._api_client._http_options.api_version = "v1beta1"
244+
client._api_client._http_options.base_url = (
245+
"https://us-central1-autopush-aiplatform.sandbox.googleapis.com/"
246+
)
247+
metric_resource_name = "projects/977012026409/locations/us-central1/evaluationMetrics/6048334299558576128"
248+
metric = types.EvaluationRunMetric(
249+
metric="my_custom_metric",
250+
metric_resource_name=metric_resource_name,
251+
)
252+
evaluation_run = client.evals.create_evaluation_run(
253+
dataset=types.EvaluationDataset(
254+
eval_dataset_df=INPUT_DF_WITH_CONTEXT_AND_HISTORY
255+
),
256+
metrics=[metric],
257+
dest=GCS_DEST,
258+
)
259+
assert isinstance(evaluation_run, types.EvaluationRun)
260+
assert evaluation_run.evaluation_config.metrics[0].metric == "my_custom_metric"
261+
262+
241263
# Dataframe tests fail in replay mode because of UUID generation mismatch.
242264
# def test_create_eval_run_data_source_evaluation_dataset(client):
243265
# """Tests that create_evaluation_run() creates a correctly structured

vertexai/_genai/_evals_common.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from . import _gcs_utils
4646
from . import evals
4747
from . import types
48+
from . import _transformers as t
4849

4950
logger = logging.getLogger(__name__)
5051

@@ -1328,7 +1329,7 @@ def _resolve_dataset_inputs(
13281329

13291330

13301331
def _resolve_evaluation_run_metrics(
1331-
metrics: list[types.EvaluationRunMetric], api_client: Any
1332+
metrics: Union[list[types.EvaluationRunMetric], list[types.Metric]], api_client: Any
13321333
) -> list[types.EvaluationRunMetric]:
13331334
"""Resolves a list of evaluation run metric instances, loading RubricMetric if necessary."""
13341335
if not metrics:
@@ -1361,6 +1362,16 @@ def _resolve_evaluation_run_metrics(
13611362
e,
13621363
)
13631364
raise
1365+
elif isinstance(metric_instance, types.Metric):
1366+
config_dict = t.t_metrics([metric_instance])[0]
1367+
res_name = config_dict.pop("metric_resource_name", None)
1368+
resolved_metrics_list.append(
1369+
types.EvaluationRunMetric(
1370+
metric=metric_instance.name,
1371+
metric_config=config_dict if config_dict else None,
1372+
metric_resource_name=res_name,
1373+
)
1374+
)
13641375
else:
13651376
try:
13661377
metric_name_str = str(metric_instance)

vertexai/_genai/_transformers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def t_metrics(
3838

3939
for metric in metrics:
4040
metric_payload_item: dict[str, Any] = {}
41+
if hasattr(metric, "metric_resource_name") and metric.metric_resource_name:
42+
metric_payload_item["metric_resource_name"] = metric.metric_resource_name
4143

4244
metric_name = getv(metric, ["name"]).lower()
4345

@@ -79,6 +81,9 @@ def t_metrics(
7981
"return_raw_output": return_raw_output
8082
}
8183
metric_payload_item["pointwise_metric_spec"] = pointwise_spec
84+
elif "metric_resource_name" in metric_payload_item:
85+
# Valid case: Metric is identified by resource name; no inline spec required.
86+
pass
8287
else:
8388
raise ValueError(
8489
f"Unsupported metric type or invalid metric name: {metric_name}"

vertexai/_genai/evals.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,13 @@ def _EvaluationRunMetric_from_vertex(
392392
if getv(from_object, ["metric"]) is not None:
393393
setv(to_object, ["metric"], getv(from_object, ["metric"]))
394394

395+
if getv(from_object, ["metricResourceName"]) is not None:
396+
setv(
397+
to_object,
398+
["metric_resource_name"],
399+
getv(from_object, ["metricResourceName"]),
400+
)
401+
395402
if getv(from_object, ["metricConfig"]) is not None:
396403
setv(
397404
to_object,
@@ -410,6 +417,13 @@ def _EvaluationRunMetric_to_vertex(
410417
if getv(from_object, ["metric"]) is not None:
411418
setv(to_object, ["metric"], getv(from_object, ["metric"]))
412419

420+
if getv(from_object, ["metric_resource_name"]) is not None:
421+
setv(
422+
to_object,
423+
["metricResourceName"],
424+
getv(from_object, ["metric_resource_name"]),
425+
)
426+
413427
if getv(from_object, ["metric_config"]) is not None:
414428
setv(
415429
to_object,

vertexai/_genai/types/common.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,6 +2479,10 @@ class EvaluationRunMetric(_common.BaseModel):
24792479
metric: Optional[str] = Field(
24802480
default=None, description="""The name of the metric."""
24812481
)
2482+
metric_resource_name: Optional[str] = Field(
2483+
default=None,
2484+
description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
2485+
)
24822486
metric_config: Optional[UnifiedMetric] = Field(
24832487
default=None, description="""The unified metric used for evaluation run."""
24842488
)
@@ -2490,6 +2494,9 @@ class EvaluationRunMetricDict(TypedDict, total=False):
24902494
metric: Optional[str]
24912495
"""The name of the metric."""
24922496

2497+
metric_resource_name: Optional[str]
2498+
"""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
2499+
24932500
metric_config: Optional[UnifiedMetricDict]
24942501
"""The unified metric used for evaluation run."""
24952502

@@ -4439,6 +4446,10 @@ class Metric(_common.BaseModel):
44394446
default=None,
44404447
description="""Optional steering instruction parameters for the automated predefined metric.""",
44414448
)
4449+
metric_resource_name: Optional[str] = Field(
4450+
default=None,
4451+
description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
4452+
)
44424453

44434454
# Allow extra fields to support metric-specific config fields.
44444455
model_config = ConfigDict(extra="allow")
@@ -4643,6 +4654,9 @@ class MetricDict(TypedDict, total=False):
46434654
metric_spec_parameters: Optional[dict[str, Any]]
46444655
"""Optional steering instruction parameters for the automated predefined metric."""
46454656

4657+
metric_resource_name: Optional[str]
4658+
"""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
4659+
46464660

46474661
MetricOrDict = Union[Metric, MetricDict]
46484662

0 commit comments

Comments
 (0)