@@ -1281,66 +1281,91 @@ def aggregate(
12811281 )
12821282
12831283
1284- class RegisteredMetricHandler (MetricHandler [types .MetricSource ]):
1284+ class RegisteredMetricHandler (MetricHandler [types .Metric ]):
12851285 """Metric handler for registered metrics."""
12861286
12871287 def __init__ (
12881288 self ,
12891289 module : "evals.Evals" ,
1290- metric : Union [ types .MetricSource , types . MetricSourceDict ] ,
1290+ metric : types .Metric ,
12911291 ):
12921292 if isinstance (metric , dict ):
12931293 metric = types .MetricSource (** metric )
12941294 super ().__init__ (module = module , metric = metric )
12951295
1296- # TODO: b/489823454 - Unify _build_request_payload with PredefinedMetricHandler.
12971296 def _build_request_payload (
12981297 self , eval_case : types .EvalCase , response_index : int
12991298 ) -> dict [str , Any ]:
1300- """Builds request payload for registered metric."""
1301- if not self .metric .metric :
1299+ """Builds request payload for registered metric by assembling EvaluationInstance."""
1300+ response_content = _get_response_from_eval_case (
1301+ eval_case , response_index , self .metric_name
1302+ )
1303+
1304+ if not response_content and not getattr (eval_case , "agent_data" , None ):
13021305 raise ValueError (
1303- "Registered metric must have an underlying metric definition."
1306+ f"Response content missing for candidate { response_index } ."
1307+ )
1308+
1309+ reference_instance_data = None
1310+ if eval_case .reference :
1311+ reference_instance_data = PredefinedMetricHandler ._content_to_instance_data (
1312+ eval_case .reference .response
13041313 )
1305- return PredefinedMetricHandler (
1306- self .module , metric = self .metric .metric
1307- )._build_request_payload (eval_case , response_index )
1314+
1315+ extracted_prompt = _get_prompt_from_eval_case (eval_case )
1316+ prompt_instance_data = PredefinedMetricHandler ._content_to_instance_data (
1317+ extracted_prompt
1318+ )
1319+
1320+ instance_payload = types .EvaluationInstance (
1321+ prompt = prompt_instance_data ,
1322+ response = PredefinedMetricHandler ._content_to_instance_data (
1323+ response_content
1324+ ),
1325+ reference = reference_instance_data ,
1326+ rubric_groups = eval_case .rubric_groups ,
1327+ agent_data = PredefinedMetricHandler ._eval_case_to_agent_data (eval_case ),
1328+ )
1329+
1330+ request_payload = {
1331+ "instance" : instance_payload ,
1332+ }
1333+ return request_payload
13081334
13091335 @property
13101336 def metric_name (self ) -> str :
1311- # Resolve name from resource name or internal metric name
1312- if isinstance (self .metric , types .MetricSource ):
1313- if self .metric .metric and self .metric .metric .name :
1314- return self .metric .metric .name
1315- if self .metric .metric_resource_name :
1316- return self .metric .metric_resource_name
1317- return "unknown"
1318- else : # Should be Metric
1319- metric_like = self .metric
1320- if metric_like .name :
1321- return metric_like .name
1322- if metric_like .metric_resource_name :
1323- return metric_like .metric_resource_name
1324- return "unknown"
1337+ return self .metric .name or "unknown_metric"
13251338
13261339 @override
13271340 def get_metric_result (
13281341 self , eval_case : types .EvalCase , response_index : int
13291342 ) -> types .EvalCaseMetricResult :
1330- """Processes a single evaluation case for a registered metric ."""
1343+ """Processes a single evaluation case using a MetricSource reference ."""
13311344 metric_name = self .metric_name
1345+ metric_source = types .MetricSource (
1346+ metric_resource_name = self .metric .metric_resource_name
1347+ )
1348+
13321349 try :
13331350 payload = self ._build_request_payload (eval_case , response_index )
13341351 for attempt in range (_MAX_RETRIES ):
13351352 try :
13361353 api_response = self .module ._evaluate_instances (
1337- metric_sources = [self . metric ],
1354+ metric_sources = [metric_source ],
13381355 instance = payload .get ("instance" ),
13391356 autorater_config = payload .get ("autorater_config" ),
13401357 )
13411358 break
13421359 except genai_errors .ClientError as e :
13431360 if e .code == 429 :
1361+ logger .warning (
1362+ "Resource Exhausted error on attempt %d/%d: %s. Retrying in %s"
1363+ " seconds..." ,
1364+ attempt + 1 ,
1365+ _MAX_RETRIES ,
1366+ e ,
1367+ 2 ** attempt ,
1368+ )
13441369 if attempt == _MAX_RETRIES - 1 :
13451370 return types .EvalCaseMetricResult (
13461371 metric_name = metric_name ,
@@ -1377,7 +1402,6 @@ def aggregate(
13771402 self , eval_case_metric_results : list [types .EvalCaseMetricResult ]
13781403 ) -> types .AggregatedMetricResult :
13791404 """Aggregates the metric results for a registered metric."""
1380- logger .debug ("Aggregating results for registered metric: %s" , self .metric_name )
13811405 return _default_aggregate_scores (
13821406 self .metric_name , eval_case_metric_results , calculate_pass_rate = True
13831407 )
0 commit comments