Fix Task Navigation Evaluator Specs

m7md7sien · m7md7sien · commit 6a40c03b66d6 · 2025-11-03T18:35:48.000+02:00
diff --git a/assets/evaluators/builtin/task_navigation_efficiency/evaluator/_task_navigation_efficiency.py b/assets/evaluators/builtin/task_navigation_efficiency/evaluator/_task_navigation_efficiency.py
@@ -397,7 +397,11 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s
         ground_truth_names = []
         ground_truth_params_dict: Dict[str, Dict[str, Any]] = {}
 
-        if isinstance(ground_truth, tuple) and len(ground_truth) == 2:
+        if isinstance(ground_truth, list) and all(isinstance(step, str) for step in ground_truth):
+            # List format: just tool names
+            ground_truth_names = [step.strip() for step in ground_truth]
+            use_parameter_matching = False
+        elif (isinstance(ground_truth, tuple) or isinstance(ground_truth, list)) and len(ground_truth) == 2:
             # Tuple format: (tool_names, parameters_dict)
             tool_names_list, params_dict = ground_truth
 
@@ -429,10 +433,6 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s
             ground_truth_names = [name.strip() for name in tool_names_list]
             ground_truth_params_dict = params_dict
             use_parameter_matching = True
-        elif isinstance(ground_truth, list) and all(isinstance(step, str) for step in ground_truth):
-            # List format: just tool names
-            ground_truth_names = [step.strip() for step in ground_truth]
-            use_parameter_matching = False
         else:
             raise TypeError(
                 "ground_truth must be a list of strings or a tuple of (list[str], dict[str, dict[str, str]])"
diff --git a/assets/evaluators/builtin/task_navigation_efficiency/spec.yaml b/assets/evaluators/builtin/task_navigation_efficiency/spec.yaml
@@ -1,6 +1,6 @@
 type: "evaluator"
 name: "builtin.task_navigation_efficiency"
-version: 2
+version: 3
 displayName: "Task-Navigation-Efficiency-Evaluator"
 description: "Determines whether an agent’s sequence of steps (e.g., tool calls and parameters) matches an optimal or ground truth path for completing a task. Use it to evaluate how effectively an agent follows expected sequence of actions and executes multi-step workflows."
 evaluatorType: "builtin"
@@ -29,11 +29,10 @@ dataMappingSchema:
           items:
             type: "string"
         - type: "array"
-          items:
-            - type: "array"
-              items:
-                type: "string"
-            - type: "object"
+          prefixItems: [
+            { type: "array", items: { type: "string" } },
+            { type: "object" }
+          ]
   required: ["response", "ground_truth"]
 outputSchema:
   task_navigation_efficiency: