Skip to content

Commit 5dfec6d

Browse files
CUA like agent with tool use and hint support. (#318)
* overlay_utils can return array if needed. * exact goal loading in the tool-use-agent * add tool use cua_like_agent * remove unused imports * make extra-dependency user facing and update dev dependency group * update CI/CD env installation (dev is default group) * update makefile to use uv * black and remove unneeded items pip list from code formatting CI/CD. --------- Co-authored-by: Patrice Bechard <[email protected]>
1 parent 356b0d7 commit 5dfec6d

File tree

10 files changed

+829
-62
lines changed

10 files changed

+829
-62
lines changed

.github/workflows/code_format.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,10 @@ jobs:
2424
enable-cache: true
2525

2626
- name: Set up Python
27-
run: uv python install 3.11
27+
run: uv python install 3.12
2828

2929
- name: Install dependencies
30-
run: uv sync --frozen --extra dev
31-
32-
- name: List packages
33-
run: uv pip list
30+
run: uv sync --frozen
3431

3532
- name: Code Formatting
3633
run: uv run black src/ --check --diff

.github/workflows/darglint.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ jobs:
2727
run: uv python install 3.12 # this fails in 3.11
2828

2929
- name: Install dependencies
30-
run: uv sync --frozen --extra dev
31-
32-
- name: List packages
33-
run: uv pip list
30+
run: uv sync --frozen
3431

3532
- name: Darglint checks
3633
run: uv run darglint -v 2 -z short src/

.github/workflows/unit_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
run: uv python install 3.11
3333

3434
- name: Install AgentLab
35-
run: uv sync --frozen --extra dev
35+
run: uv sync --frozen
3636

3737
- name: List packages
3838
run: uv pip list

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
.PHONY: test setup miniwob lint stop-miniwob osworld
22

33
setup:
4-
@pip install -e .
5-
@playwright install chromium --with-deps
6-
@python -c 'import nltk; nltk.download("punkt_tab")'
4+
@uv sync --python 3.12
5+
@uv run playwright install chromium --with-deps
6+
@uv run python -c 'import nltk; nltk.download("punkt_tab")'
77

88
miniwob: stop-miniwob
99
@git clone https://github.com/Farama-Foundation/miniwob-plusplus.git || true
1010
@cd miniwob-plusplus && git checkout 7fd85d71a4b60325c6585396ec4f48377d049838
11-
@python -m http.server 8080 --directory miniwob-plusplus/miniwob/html & echo $$! > .miniwob-server.pid
11+
@uv run python -m http.server 8080 --directory miniwob-plusplus/miniwob/html & echo $$! > .miniwob-server.pid
1212
@sleep 3
1313
@echo "MiniWob server started on http://localhost:8080"
1414

@@ -22,14 +22,14 @@ stop-miniwob:
2222
@echo "MiniWob server stopped"
2323

2424
run-tests:
25-
@MINIWOB_URL="http://localhost:8080/miniwob/" pytest -n 5 --durations=10 -m 'not pricy' tests/
25+
@MINIWOB_URL="http://localhost:8080/miniwob/" uv run pytest -n 5 --durations=10 -m 'not pricy' tests/
2626
@echo "Tests completed"
2727

2828
test: setup miniwob check-miniwob run-tests stop-miniwob
2929

3030
lint: setup
31-
@black src/ --check --diff
32-
@darglint -v 2 -z short src/
31+
@uv run black src/ --check --diff
32+
@uv run darglint -v 2 -z short src/
3333

3434
osworld:
3535
@echo "Setting up OSWorld..."
@@ -42,9 +42,9 @@ osworld:
4242
sed -i.bak 's/tqdm~=.*/tqdm/' requirements.txt && \
4343
sed -i.bak 's/pandas~=.*/pandas/' requirements.txt
4444
@echo "Installing OSWorld requirements..."
45-
@cd OSWorld && pip install -r requirements.txt
45+
@cd OSWorld && uv pip install -r requirements.txt
4646
@echo "Installing OSWorld in development mode..."
47-
@cd OSWorld && pip install -e .
47+
@cd OSWorld && uv pip install -e .
4848
@echo "OSWorld setup completed!"
4949
@echo "Next steps:"
5050
@echo "1. Configure your VM (VMware/VirtualBox) according to OSWorld documentation"

pyproject.toml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,20 @@ dependencies = [
4949
"pillow",
5050
"gymnasium>=0.27",
5151
"torch>=2.2.2",
52-
"safetensors>=0.4.0",
53-
"transformers>=4.38.2",
5452
"anthropic>=0.62.0",
5553
"litellm>=1.75.3",
5654
"python-dotenv>=1.1.1",
5755
]
5856

5957
[project.optional-dependencies]
60-
dev = [
61-
"black[jupyter]>=24.2.0",
62-
"blacken-docs",
63-
"pre-commit",
64-
"pytest==7.3.2",
65-
"flaky",
66-
"pytest-xdist",
67-
"pytest-playwright",
58+
hint = [
59+
"sentence-transformers>=5.0.0",
60+
]
61+
transformers = [
62+
"transformers>=4.38.2",
6863
]
6964

65+
7066
[project.urls]
7167
Homepage = "https://github.com/ServiceNow/AgentLab"
7268

@@ -107,9 +103,13 @@ dev = [
107103
"darglint>=1.8.1",
108104
"ipykernel>=6.30.1",
109105
"pip>=25.2",
110-
]
111-
hint = [
112-
"sentence-transformers>=5.0.0",
106+
"black[jupyter]>=24.2.0",
107+
"blacken-docs",
108+
"pre-commit",
109+
"pytest==7.3.2",
110+
"flaky",
111+
"pytest-xdist",
112+
"pytest-playwright",
113113
]
114114

115115

src/agentlab/agents/agent_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from agentlab.analyze import overlay_utils
77
from agentlab.llm.llm_utils import img_to_base_64
8+
import numpy as np
89

910

1011
def draw_mouse_pointer(image: Image.Image, x: int, y: int) -> Image.Image:
@@ -135,7 +136,7 @@ def zoom_webpage(page: Page, zoom_factor: float = 1.5):
135136
return page
136137

137138

138-
def overlay_action(obs, action):
139+
def overlay_action(obs, action, return_array=False):
139140
"""Overlays actions on screenshot in-place"""
140141
act_img = copy.deepcopy(obs["screenshot"])
141142
act_img = Image.fromarray(act_img)
@@ -153,4 +154,7 @@ def overlay_action(obs, action):
153154
pass
154155

155156
overlay_utils.annotate_action(act_img, action, properties=new_obs_properties)
156-
return img_to_base_64(act_img)
157+
if return_array:
158+
return np.array(act_img)
159+
else:
160+
return img_to_base_64(act_img)

0 commit comments

Comments
 (0)