UiPath · mathurk · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Chibionos
diff --git a/src/uipath/eval/_helpers/helpers.py b/src/uipath/eval/_helpers/helpers.py
@@ -10,6 +10,33 @@
 from ..models import ErrorEvaluationResult, EvaluationResult
 
 
+def is_empty_value(value: Any) -> bool:
+    """Check if a value is empty or contains only empty values.
+
+    Handles multiple cases:
+    - None or empty string
+    - String with only whitespace
+    - Dict where all values are empty strings or whitespace
+    - Empty list or dict
+    """
+    if value is None:
+        return True
+
+    if isinstance(value, str):
+        return not value.strip()
+
+    if isinstance(value, dict):
+        if not value:  # Empty dict
+            return True
+        # Check if all values are empty strings
+        return all(isinstance(v, str) and not v.strip() for v in value.values())
+
+    if isinstance(value, list):
+        return len(value) == 0
+
+    return False
+
+
 def auto_discover_entrypoint() -> str:
     """Auto-discover entrypoint from config file.
 

diff --git a/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/legacy_llm_as_judge_evaluator.py
@@ -1,5 +1,6 @@
 """LLM-as-a-judge evaluator for subjective quality assessment of agent outputs."""
 
+import logging
 from typing import Any, Optional
 
 from pydantic import field_validator
@@ -9,14 +10,23 @@
 from ..._utils.constants import COMMUNITY_agents_SUFFIX
 from ...platform.chat import UiPathLlmChatService
 from ...platform.chat.llm_gateway import RequiredToolChoice
-from ..models.models import AgentExecution, EvaluationResult, LLMResponse
+from .._helpers.helpers import is_empty_value
+from ..models.models import (
+    AgentExecution,
+    EvaluationResult,
+    LLMResponse,
+    UiPathEvaluationError,
+    UiPathEvaluationErrorCategory,
+)
 from .base_legacy_evaluator import (
     BaseLegacyEvaluator,
     LegacyEvaluationCriteria,
     LegacyEvaluatorConfig,
 )
 from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
 
+logger = logging.getLogger(__name__)
+
 
 class LegacyLlmAsAJudgeEvaluatorConfig(LegacyEvaluatorConfig):
     """Configuration for legacy LLM-as-a-judge evaluators."""
@@ -124,6 +134,19 @@ def _create_evaluation_prompt(
         self, expected_output: Any, actual_output: Any
     ) -> str:
         """Create the evaluation prompt for the LLM."""
+        # Validate that expected output is not empty
+        if is_empty_value(expected_output):
+            logger.error(
+                "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
+                f"Received: {repr(expected_output)}"
+            )
+            raise UiPathEvaluationError(
+                code="EMPTY_EXPECTED_OUTPUT",
+                title="Expected output cannot be empty",
+                detail="The evaluation criteria must contain a non-empty expected output.",
+                category=UiPathEvaluationErrorCategory.USER,
+            )
+
         formatted_prompt = self.prompt.replace(
             self.actual_output_placeholder,
             str(actual_output),

diff --git a/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py b/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py
@@ -1,5 +1,6 @@
 """Trajectory evaluator for analyzing execution paths and decision sequences."""
 
+import logging
 from typing import Any, Optional
 
 from opentelemetry.sdk.trace import ReadableSpan
@@ -10,11 +11,14 @@
 from ..._utils.constants import COMMUNITY_agents_SUFFIX
 from ...platform.chat import UiPathLlmChatService
 from ...platform.chat.llm_gateway import RequiredToolChoice
+from .._helpers.helpers import is_empty_value
 from ..models.models import (
     AgentExecution,
     LLMResponse,
     NumericEvaluationResult,
     TrajectoryEvaluationTrace,
+    UiPathEvaluationError,
+    UiPathEvaluationErrorCategory,
 )
 from .base_legacy_evaluator import (
     BaseLegacyEvaluator,
@@ -23,6 +27,8 @@
 )
 from .legacy_llm_helpers import create_evaluation_tool, extract_tool_call_response
 
+logger = logging.getLogger(__name__)
+
 
 class LegacyTrajectoryEvaluatorConfig(LegacyEvaluatorConfig):
     """Configuration for legacy trajectory evaluators."""
@@ -103,6 +109,19 @@ def _create_evaluation_prompt(
         agent_run_history: Any,
     ) -> str:
         """Create the evaluation prompt for the LLM."""
+        # Validate that expected agent behavior is not empty
+        if is_empty_value(expected_agent_behavior):
+            logger.error(
+                "❌ EMPTY_EXPECTED_AGENT_BEHAVIOR: Expected agent behavior is empty or contains only empty values. "
+                f"Received: {repr(expected_agent_behavior)}"
+            )
+            raise UiPathEvaluationError(
+                code="EMPTY_EXPECTED_AGENT_BEHAVIOR",
+                title="Expected agent behavior cannot be empty",
+                detail="The evaluation criteria must contain a non-empty expected agent behavior.",
+                category=UiPathEvaluationErrorCategory.USER,
+            )
+
         formatted_prompt = self.prompt.replace(
             self.expected_agent_behavior_placeholder,
             str(expected_agent_behavior),

diff --git a/src/uipath/eval/evaluators/llm_as_judge_evaluator.py b/src/uipath/eval/evaluators/llm_as_judge_evaluator.py
@@ -136,34 +136,6 @@ def _get_expected_output(self, evaluation_criteria: T) -> Any:
         """Get the expected output from the evaluation criteria. Must be implemented by concrete evaluator classes."""
         pass
 
-    def _is_empty_expected_output(self, expected_output: Any) -> bool:
-        """Check if the expected output is empty or contains only empty values.
-
-        Handles multiple cases:
-        - None or empty string
-        - String with only whitespace
-        - Dict where all values are empty strings or whitespace
-        - Empty list or dict
-        """
-        if expected_output is None:
-            return True
-
-        if isinstance(expected_output, str):
-            return not expected_output.strip()
-
-        if isinstance(expected_output, dict):
-            if not expected_output:  # Empty dict
-                return True
-            # Check if all values are empty strings
-            return all(
-                isinstance(v, str) and not v.strip() for v in expected_output.values()
-            )
-
-        if isinstance(expected_output, list):
-            return len(expected_output) == 0
-
-        return False
-
     async def evaluate(
         self,
         agent_execution: AgentExecution,
@@ -193,19 +165,6 @@ def _create_evaluation_prompt(
         """Create the evaluation prompt for the LLM."""
         expected_output = self._get_expected_output(evaluation_criteria)
 
-        # Validate that expected output is not empty
-        if self._is_empty_expected_output(expected_output):
-            logger.error(
-                "❌ EMPTY_EXPECTED_OUTPUT: Expected output is empty or contains only empty values. "
-                f"Received: {repr(expected_output)}"
-            )
-            raise UiPathEvaluationError(
-                code="EMPTY_EXPECTED_OUTPUT",
-                title="Expected output cannot be empty",
-                detail="The evaluation criteria must contain a non-empty expected output or expected agent behavior.",
-                category=UiPathEvaluationErrorCategory.USER,
-            )
-
         formatted_prompt = self.evaluator_config.prompt.replace(
             self.actual_output_placeholder,
             str(self._get_actual_output(agent_execution)),

diff --git a/tests/evaluators/test_helpers.py b/tests/evaluators/test_helpers.py
@@ -0,0 +1,109 @@
+"""Test module for helper functions in uipath.eval._helpers.helpers."""
+
+from uipath.eval._helpers.helpers import is_empty_value
+
+
+class TestIsEmptyValue:
+    """Test is_empty_value helper function.
+
+    These tests are based on realistic evaluation criteria structures:
+    - expectedOutput: typically a dict like {"content": "..."} or empty dict {}
+    - expectedAgentBehavior: typically a string describing expected behavior
+    """
+
+    # --- Empty expectedAgentBehavior (string) cases ---
+
+    def test_empty_string_expected_agent_behavior(self) -> None:
+        """Test empty string expectedAgentBehavior is considered empty."""
+        assert is_empty_value("") is True
+
+    def test_whitespace_only_expected_agent_behavior(self) -> None:
+        """Test whitespace-only expectedAgentBehavior is considered empty."""
+        assert is_empty_value(" ") is True
+        assert is_empty_value("   ") is True
+        assert is_empty_value("\t") is True
+        assert is_empty_value("\n") is True
+        assert is_empty_value(" \t\n ") is True
+
+    def test_valid_expected_agent_behavior(self) -> None:
+        """Test non-empty expectedAgentBehavior strings are not empty."""
+        assert is_empty_value("The agent should search for weather") is False
+        assert is_empty_value("Call the get_user tool with id=123") is False
+        assert is_empty_value(" valid behavior ") is False
+
+    # --- Empty expectedOutput (dict) cases ---
+
+    def test_empty_dict_expected_output(self) -> None:
+        """Test empty dict expectedOutput is considered empty."""
+        # trajectory evaluator: {"expectedOutput": {}}
+        assert is_empty_value({}) is True
+
+    def test_dict_with_empty_content_field(self) -> None:
+        """Test dict with empty content field is considered empty."""
+        # llm-as-a-judge: {"expectedOutput": {"content": ""}}
+        assert is_empty_value({"content": ""}) is True
+
+    def test_dict_with_whitespace_content_field(self) -> None:
+        """Test dict with whitespace-only content field is considered empty."""
+        assert is_empty_value({"content": "   "}) is True
+        assert is_empty_value({"content": "\t\n"}) is True
+
+    def test_dict_with_multiple_empty_string_fields(self) -> None:
+        """Test dict where all values are empty strings is considered empty."""
+        assert is_empty_value({"content": "", "reasoning": ""}) is True
+        assert is_empty_value({"content": "  ", "reasoning": "\t"}) is True
+
+    def test_dict_with_valid_content_field(self) -> None:
+        """Test dict with non-empty content field is not empty."""
+        assert is_empty_value({"content": "Expected response"}) is False
+        assert is_empty_value({"content": "The answer is 42"}) is False
+
+    def test_dict_with_mixed_empty_and_non_empty_fields(self) -> None:
+        """Test dict with at least one non-empty value is not empty."""
+        # If any value is non-empty, the whole dict is not empty
+        assert is_empty_value({"content": "value", "reasoning": ""}) is False
+        assert is_empty_value({"content": "", "reasoning": "some reason"}) is False
+
+    # --- None case ---
+
+    def test_none_is_empty(self) -> None:
+        """Test that None is considered empty."""
+        assert is_empty_value(None) is True
+
+    # --- Empty list case ---
+
+    def test_empty_list_is_empty(self) -> None:
+        """Test that empty list is considered empty."""
+        assert is_empty_value([]) is True
+
+    def test_non_empty_list_is_not_empty(self) -> None:
+        """Test that non-empty lists are not considered empty."""
+        assert is_empty_value(["step1", "step2"]) is False
+        assert is_empty_value([{"content": ""}]) is False
+
+    # --- Edge cases with non-string dict values ---
+
+    def test_dict_with_non_string_values_is_not_empty(self) -> None:
+        """Test that dict with non-string values is not considered empty.
+
+        The function only checks if all values are empty strings.
+        Non-string values (int, bool, None, list, dict) make it non-empty.
+        """
+        assert is_empty_value({"content": 0}) is False
+        assert is_empty_value({"content": False}) is False
+        assert is_empty_value({"content": None}) is False
+        assert is_empty_value({"content": []}) is False
+        assert is_empty_value({"content": {}}) is False
+
+    # --- Other types (not typical in eval criteria but handled) ---
+
+    def test_numeric_values_are_not_empty(self) -> None:
+        """Test that numeric values are not considered empty."""
+        assert is_empty_value(0) is False
+        assert is_empty_value(42) is False
+        assert is_empty_value(0.0) is False
+
+    def test_boolean_values_are_not_empty(self) -> None:
+        """Test that boolean values are not considered empty."""
+        assert is_empty_value(False) is False
+        assert is_empty_value(True) is False