Skip to content

Commit ec7ad6e

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add support for referencing registered metrics by resource name in evaluation run API
PiperOrigin-RevId: 878604099
1 parent 79d8e1c commit ec7ad6e

5 files changed

Lines changed: 60 additions & 1 deletion

File tree

tests/unit/vertexai/genai/replays/test_evaluate_instances.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,21 @@ def test_rouge_metric(client):
7979
assert len(response.rouge_results.rouge_metric_values) == 1
8080

8181

82+
def test_evaluate_with_metric_resource_name(client):
83+
metric_res = "projects/my-project/locations/us-central1/evaluationMetrics/my-metric-id"
84+
dataset = types.EvaluationDataset(
85+
eval_dataset_df=pd.DataFrame({
86+
"prompt": ["What is 1+1?"],
87+
"response": ["2"],
88+
})
89+
)
90+
result = client.evals.evaluate(
91+
dataset=dataset,
92+
metrics=[types.Metric(name="my_metric", metric_resource_name=metric_res)],
93+
)
94+
assert result is not None
95+
96+
8297
def test_pointwise_metric(client):
8398
"""Tests the _evaluate_instances method with PointwiseMetricInput."""
8499
instance_dict = {

vertexai/_genai/_evals_common.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from . import _gcs_utils
4646
from . import evals
4747
from . import types
48+
from . import _transformers as t
4849

4950
logger = logging.getLogger(__name__)
5051

@@ -1328,7 +1329,7 @@ def _resolve_dataset_inputs(
13281329

13291330

13301331
def _resolve_evaluation_run_metrics(
1331-
metrics: list[types.EvaluationRunMetric], api_client: Any
1332+
metrics: list[types.EvaluationRunMetric] | list[types.Metric], api_client: Any
13321333
) -> list[types.EvaluationRunMetric]:
13331334
"""Resolves a list of evaluation run metric instances, loading RubricMetric if necessary."""
13341335
if not metrics:
@@ -1361,6 +1362,16 @@ def _resolve_evaluation_run_metrics(
13611362
e,
13621363
)
13631364
raise
1365+
elif isinstance(metric_instance, types.Metric):
1366+
config_dict = t.t_metrics([metric_instance])[0]
1367+
res_name = config_dict.pop("metric_resource_name", None)
1368+
resolved_metrics_list.append(
1369+
types.EvaluationRunMetric(
1370+
metric=metric_instance.name,
1371+
metric_config=config_dict if config_dict else None,
1372+
metric_resource_name=res_name,
1373+
)
1374+
)
13641375
else:
13651376
try:
13661377
metric_name_str = str(metric_instance)

vertexai/_genai/_transformers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def t_metrics(
3838

3939
for metric in metrics:
4040
metric_payload_item: dict[str, Any] = {}
41+
if hasattr(metric, "metric_resource_name") and metric.metric_resource_name:
42+
metric_payload_item["metric_resource_name"] = metric.metric_resource_name
4143

4244
metric_name = getv(metric, ["name"]).lower()
4345

@@ -79,6 +81,9 @@ def t_metrics(
7981
"return_raw_output": return_raw_output
8082
}
8183
metric_payload_item["pointwise_metric_spec"] = pointwise_spec
84+
elif "metric_resource_name" in metric_payload_item:
85+
# Valid case: Metric is identified by resource name; no inline spec required.
86+
pass
8287
else:
8388
raise ValueError(
8489
f"Unsupported metric type or invalid metric name: {metric_name}"

vertexai/_genai/evals.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,13 @@ def _EvaluationRunMetric_from_vertex(
392392
if getv(from_object, ["metric"]) is not None:
393393
setv(to_object, ["metric"], getv(from_object, ["metric"]))
394394

395+
if getv(from_object, ["metricResourceName"]) is not None:
396+
setv(
397+
to_object,
398+
["metric_resource_name"],
399+
getv(from_object, ["metricResourceName"]),
400+
)
401+
395402
if getv(from_object, ["metricConfig"]) is not None:
396403
setv(
397404
to_object,
@@ -410,6 +417,13 @@ def _EvaluationRunMetric_to_vertex(
410417
if getv(from_object, ["metric"]) is not None:
411418
setv(to_object, ["metric"], getv(from_object, ["metric"]))
412419

420+
if getv(from_object, ["metric_resource_name"]) is not None:
421+
setv(
422+
to_object,
423+
["metricResourceName"],
424+
getv(from_object, ["metric_resource_name"]),
425+
)
426+
413427
if getv(from_object, ["metric_config"]) is not None:
414428
setv(
415429
to_object,

vertexai/_genai/types/common.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,6 +2479,10 @@ class EvaluationRunMetric(_common.BaseModel):
24792479
metric: Optional[str] = Field(
24802480
default=None, description="""The name of the metric."""
24812481
)
2482+
metric_resource_name: Optional[str] = Field(
2483+
default=None,
2484+
description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
2485+
)
24822486
metric_config: Optional[UnifiedMetric] = Field(
24832487
default=None, description="""The unified metric used for evaluation run."""
24842488
)
@@ -2490,6 +2494,9 @@ class EvaluationRunMetricDict(TypedDict, total=False):
24902494
metric: Optional[str]
24912495
"""The name of the metric."""
24922496

2497+
metric_resource_name: Optional[str]
2498+
"""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
2499+
24932500
metric_config: Optional[UnifiedMetricDict]
24942501
"""The unified metric used for evaluation run."""
24952502

@@ -4439,6 +4446,10 @@ class Metric(_common.BaseModel):
44394446
default=None,
44404447
description="""Optional steering instruction parameters for the automated predefined metric.""",
44414448
)
4449+
metric_resource_name: Optional[str] = Field(
4450+
default=None,
4451+
description="""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}""",
4452+
)
44424453

44434454
# Allow extra fields to support metric-specific config fields.
44444455
model_config = ConfigDict(extra="allow")
@@ -4643,6 +4654,9 @@ class MetricDict(TypedDict, total=False):
46434654
metric_spec_parameters: Optional[dict[str, Any]]
46444655
"""Optional steering instruction parameters for the automated predefined metric."""
46454656

4657+
metric_resource_name: Optional[str]
4658+
"""The resource name of the metric definition. Example: projects/{project}/locations/{location}/evaluationMetrics/{evaluation_metric_id}"""
4659+
46464660

46474661
MetricOrDict = Union[Metric, MetricDict]
46484662

0 commit comments

Comments
 (0)