Skip to content

Commit 6a40c03

Browse files
committed
Fix Task Navigation Evaluator Specs
1 parent 46a6f2b commit 6a40c03

File tree

2 files changed

+10
-11
lines changed

2 files changed

+10
-11
lines changed

assets/evaluators/builtin/task_navigation_efficiency/evaluator/_task_navigation_efficiency.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,11 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s
397397
ground_truth_names = []
398398
ground_truth_params_dict: Dict[str, Dict[str, Any]] = {}
399399

400-
if isinstance(ground_truth, tuple) and len(ground_truth) == 2:
400+
if isinstance(ground_truth, list) and all(isinstance(step, str) for step in ground_truth):
401+
# List format: just tool names
402+
ground_truth_names = [step.strip() for step in ground_truth]
403+
use_parameter_matching = False
404+
elif (isinstance(ground_truth, tuple) or isinstance(ground_truth, list)) and len(ground_truth) == 2:
401405
# Tuple format: (tool_names, parameters_dict)
402406
tool_names_list, params_dict = ground_truth
403407

@@ -429,10 +433,6 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str, Dict[s
429433
ground_truth_names = [name.strip() for name in tool_names_list]
430434
ground_truth_params_dict = params_dict
431435
use_parameter_matching = True
432-
elif isinstance(ground_truth, list) and all(isinstance(step, str) for step in ground_truth):
433-
# List format: just tool names
434-
ground_truth_names = [step.strip() for step in ground_truth]
435-
use_parameter_matching = False
436436
else:
437437
raise TypeError(
438438
"ground_truth must be a list of strings or a tuple of (list[str], dict[str, dict[str, str]])"

assets/evaluators/builtin/task_navigation_efficiency/spec.yaml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
type: "evaluator"
22
name: "builtin.task_navigation_efficiency"
3-
version: 2
3+
version: 3
44
displayName: "Task-Navigation-Efficiency-Evaluator"
55
description: "Determines whether an agent’s sequence of steps (e.g., tool calls and parameters) matches an optimal or ground truth path for completing a task. Use it to evaluate how effectively an agent follows expected sequence of actions and executes multi-step workflows."
66
evaluatorType: "builtin"
@@ -29,11 +29,10 @@ dataMappingSchema:
2929
items:
3030
type: "string"
3131
- type: "array"
32-
items:
33-
- type: "array"
34-
items:
35-
type: "string"
36-
- type: "object"
32+
prefixItems: [
33+
{ type: "array", items: { type: "string" } },
34+
{ type: "object" }
35+
]
3736
required: ["response", "ground_truth"]
3837
outputSchema:
3938
task_navigation_efficiency:

0 commit comments

Comments
 (0)