Skip to content

Commit b1e6bdb

Browse files
authored
Script to add mypy per-line ignores (#273)
1 parent 42db624 commit b1e6bdb

File tree

3 files changed

+186
-1
lines changed

3 files changed

+186
-1
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build
77
.tmp.env*
88
.coverage
99
.python-version
10-
*.ipynb
10+
.ipynb_checkpoints
1111
.ruff_cache
1212
.idea
1313
__pycache__

tools/mypy_ignores/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Simple script to parse mypy logs from CI and add per-line ignores to source code.
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "d4a46946-bc43-400d-a25f-cdf7564c702e",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import os\n",
11+
"import re\n",
12+
"import datetime\n",
13+
"from collections import defaultdict\n",
14+
"\n",
15+
"import attr"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": null,
21+
"id": "106a80be-5228-4962-8f1a-12a222404e03",
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"LOG_FILENAME = \"~/Downloads/mypy_20240130.log\"\n",
26+
"REPO_BASE_PATH = \"~/dev/datalens-backend\"\n",
27+
"\n",
28+
"PKG_RUN_PREFIX = \"Cmd: ['mypy', '--cache-dir=/tmp/mypy_cache']; cwd=data/\"\n",
29+
"CWD_PREFIX = \"/src/\" # \"/data/\"\n",
30+
"pkg_run_cmd_re= re.compile(\n",
31+
" \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z Cmd: \\['mypy', '\\-\\-cache-dir=/tmp/mypy_cache'\\]; cwd=(.+)\"\n",
32+
")\n",
33+
"\n",
34+
"err_line_re = re.compile(\n",
35+
" \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z (?P<path>[^:]+):(?P<line>\\d+): (?P<status>\\w+): (?P<msg>.*)\"\n",
36+
")"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"id": "0859bc96-d443-4b3e-a338-616a8ca04978",
43+
"metadata": {},
44+
"outputs": [],
45+
"source": [
46+
"@attr.s\n",
47+
"class ErrInfo:\n",
48+
" pkg: str = attr.ib()\n",
49+
" path: str = attr.ib()\n",
50+
" line: int = attr.ib() # starting from 0\n",
51+
" msg_list: list[str] = attr.ib()\n",
52+
"\n",
53+
" def get_comment(self) -> str:\n",
54+
" # Use only first err message if there are more than one per line\n",
55+
" return f\" # type: ignore # {datetime.date.today().isoformat()} # TODO: {self.msg_list[0]}\"\n",
56+
"\n",
57+
" def get_full_path(self) -> str:\n",
58+
" return os.path.join(self.pkg, self.path)"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"id": "4ad5054d-88b7-473f-88ff-4683c4106c5c",
65+
"metadata": {},
66+
"outputs": [],
67+
"source": [
68+
"file_errs: defaultdict[str: list[ErrInfo]] = defaultdict(list)\n",
69+
"\n",
70+
"with open(LOG_FILENAME, \"r\") as log_file:\n",
71+
" pkg_name = None\n",
72+
" for line in log_file:\n",
73+
" line = line.strip()\n",
74+
" run_cmd_match = pkg_run_cmd_re.match(line)\n",
75+
" if run_cmd_match:\n",
76+
" pkg_name = run_cmd_match.groups()[0].replace(CWD_PREFIX, \"\")\n",
77+
"\n",
78+
" match = err_line_re.match(line)\n",
79+
" if match:\n",
80+
" groups = match.groupdict()\n",
81+
"\n",
82+
" if not pkg_name:\n",
83+
" continue\n",
84+
"\n",
85+
" if groups[\"status\"] == \"error\":\n",
86+
" err_info = ErrInfo(\n",
87+
" pkg=pkg_name,\n",
88+
" path=groups[\"path\"],\n",
89+
" line=int(groups[\"line\"]) - 1,\n",
90+
" msg_list=[groups[\"msg\"]],\n",
91+
" )\n",
92+
" assert err_info.line >= 0\n",
93+
"\n",
94+
" file_errs[err_info.get_full_path()].append(err_info)\n",
95+
"\n",
96+
"print(len(file_errs))\n",
97+
"\n",
98+
"# Merge single row errors\n",
99+
"for file_path, ei_list in file_errs.items():\n",
100+
" new_ei_list = []\n",
101+
" prev_ei = None\n",
102+
" for ei in ei_list:\n",
103+
" if not prev_ei:\n",
104+
" new_ei_list.append(ei)\n",
105+
" prev_ei = ei\n",
106+
" else:\n",
107+
" if ei.line == prev_ei.line:\n",
108+
" prev_ei.msg_list.extend(ei.msg_list)\n",
109+
" else:\n",
110+
" new_ei_list.append(ei)\n",
111+
" prev_ei = ei\n",
112+
" file_errs[file_path] = new_ei_list"
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"execution_count": null,
118+
"id": "b4f45bd1-5c56-4d0d-9937-5fa12350832b",
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"paths = list(\n",
123+
" filter(\n",
124+
" lambda t: (\n",
125+
" # t.startswith(\"lib/\")\n",
126+
" # (\n",
127+
" # t.startswith(\"lib/\") or \n",
128+
" # t.startswith(\"app/\")\n",
129+
" # ) and \n",
130+
" # not t.startswith(\"lib/dl_formula/dl_formula/parser/antlr/\")\n",
131+
" ), \n",
132+
" file_errs.keys(),\n",
133+
" )\n",
134+
")\n",
135+
"len(paths)"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"id": "c74069c0-735f-46b1-b23f-8dbec456157c",
142+
"metadata": {},
143+
"outputs": [],
144+
"source": [
145+
"for filename in file_errs.keys():\n",
146+
" path = os.path.join(REPO_BASE_PATH, filename)\n",
147+
" # print(path)\n",
148+
" # print(len(file_errs[filename]))\n",
149+
" with open(path, 'r') as f:\n",
150+
" lines = f.readlines()\n",
151+
"\n",
152+
" for ei in file_errs[filename]:\n",
153+
" line = lines[ei.line].rstrip()\n",
154+
" if \"#\" in line:\n",
155+
" print(f\"!!! Multiple comments in one line: {path}:{ei.line + 1}\")\n",
156+
" lines[ei.line] = f\"{line}{ei.get_comment()}\\n\"\n",
157+
"\n",
158+
" with open(path, 'w') as f:\n",
159+
" f.writelines(lines)"
160+
]
161+
}
162+
],
163+
"metadata": {
164+
"kernelspec": {
165+
"display_name": "Python 3 (ipykernel)",
166+
"language": "python",
167+
"name": "python3"
168+
},
169+
"language_info": {
170+
"codemirror_mode": {
171+
"name": "ipython",
172+
"version": 3
173+
},
174+
"file_extension": ".py",
175+
"mimetype": "text/x-python",
176+
"name": "python",
177+
"nbconvert_exporter": "python",
178+
"pygments_lexer": "ipython3",
179+
"version": "3.10.13"
180+
}
181+
},
182+
"nbformat": 4,
183+
"nbformat_minor": 5
184+
}

0 commit comments

Comments
 (0)