diff --git a/packages/uipath/src/uipath/eval/evaluators/base_legacy_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/base_legacy_evaluator.py index c053843f0..85c1759fe 100644 --- a/packages/uipath/src/uipath/eval/evaluators/base_legacy_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/base_legacy_evaluator.py @@ -8,6 +8,7 @@ from pydantic import ConfigDict, Field +from .._helpers.output_path import resolve_output_path from ..models import EvaluationResult from ..models.models import ( AgentExecution, @@ -85,6 +86,19 @@ class BaseLegacyEvaluator( # Note: __init_subclass__ is inherited from BaseEvaluator and handles metrics tracking + def get_targeted_field(self, obj: Any) -> Any: + """Resolve the target output key path from the given object. + + If target_output_key is set and not "*", resolves the dot-notation path. + Returns the original object if resolution fails or no key is configured. + """ + if self.target_output_key and self.target_output_key != "*": + try: + return resolve_output_path(obj, self.target_output_key) + except (KeyError, IndexError, TypeError): + return obj + return obj + def model_post_init(self, __context: Any): """Post-initialization hook for Pydantic models.""" # Ensure config is set up for legacy evaluators diff --git a/packages/uipath/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py index 42ffae047..2da981943 100644 --- a/packages/uipath/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py @@ -2,7 +2,6 @@ from uipath.eval.models import BooleanEvaluationResult, EvaluationResult -from .._helpers.output_path import resolve_output_path from ..models.models import AgentExecution from .base_legacy_evaluator import LegacyEvaluationCriteria, LegacyEvaluatorConfig from .legacy_deterministic_evaluator_base import BaseLegacyDeterministicEvaluator @@ -41,30 +40,8 @@ async def evaluate( Returns: EvaluationResult: Boolean result indicating exact match (True/False) """ - actual_output = agent_execution.agent_output - expected_output = evaluation_criteria.expected_output - - if self.target_output_key and self.target_output_key != "*": - if isinstance(actual_output, dict) and isinstance(expected_output, dict): - actual_resolved = True - expected_resolved = True - - try: - actual_output = resolve_output_path( - actual_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - actual_resolved = False - - try: - expected_output = resolve_output_path( - expected_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - expected_resolved = False - - if not actual_resolved or not expected_resolved: - actual_output = expected_output = {} + actual_output = self.get_targeted_field(agent_execution.agent_output) + expected_output = self.get_targeted_field(evaluation_criteria.expected_output) return BooleanEvaluationResult( score=self._canonical_json(actual_output) diff --git a/packages/uipath/src/uipath/eval/evaluators/legacy_json_similarity_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/legacy_json_similarity_evaluator.py index 70fe28b2e..5e3fd4ed9 100644 --- a/packages/uipath/src/uipath/eval/evaluators/legacy_json_similarity_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/legacy_json_similarity_evaluator.py @@ -3,7 +3,6 @@ import math from typing import Any, Tuple, TypeVar -from .._helpers.output_path import resolve_output_path from ..models import EvaluationResult, NumericEvaluationResult from ..models.models import AgentExecution from .base_legacy_evaluator import LegacyEvaluationCriteria, LegacyEvaluatorConfig @@ -47,23 +46,8 @@ async def evaluate( Returns: EvaluationResult: Numerical score between 0-100 indicating similarity """ - actual_output = agent_execution.agent_output - expected_output = evaluation_criteria.expected_output - - if self.target_output_key and self.target_output_key != "*": - try: - actual_output = resolve_output_path( - actual_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - actual_output = {} - - try: - expected_output = resolve_output_path( - expected_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - expected_output = {} + actual_output = self.get_targeted_field(agent_execution.agent_output) + expected_output = self.get_targeted_field(evaluation_criteria.expected_output) return NumericEvaluationResult( score=self._compare_json(expected_output, actual_output) diff --git a/packages/uipath/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py index 5b9a8ee1c..999449d0e 100644 --- a/packages/uipath/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py @@ -12,7 +12,6 @@ from ..._utils.constants import COMMUNITY_agents_SUFFIX from .._execution_context import eval_set_run_id_context from .._helpers.helpers import is_empty_value -from .._helpers.output_path import resolve_output_path from ..models import NumericEvaluationResult from ..models.models import ( AgentExecution, @@ -125,23 +124,8 @@ async def evaluate( if self.llm is None: self._initialize_llm() - actual_output = agent_execution.agent_output - expected_output = evaluation_criteria.expected_output - - if self.target_output_key and self.target_output_key != "*": - try: - actual_output = resolve_output_path( - actual_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - pass - - try: - expected_output = resolve_output_path( - expected_output, self.target_output_key - ) - except (KeyError, IndexError, TypeError): - pass + actual_output = self.get_targeted_field(agent_execution.agent_output) + expected_output = self.get_targeted_field(evaluation_criteria.expected_output) # Create the evaluation prompt evaluation_prompt = self._create_evaluation_prompt(