Script to add mypy per-line ignores (#273)

asnytin · web-flow · commit b1e6bdb14733 · 2024-02-02T14:21:30.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -7,7 +7,7 @@ build
 .tmp.env*
 .coverage
 .python-version
-*.ipynb
+.ipynb_checkpoints
 .ruff_cache
 .idea
 __pycache__
diff --git a/tools/mypy_ignores/README.md b/tools/mypy_ignores/README.md
@@ -0,0 +1 @@
+Simple script to parse mypy logs from CI and add per-line ignores to source code.
diff --git a/tools/mypy_ignores/mypy_ignores.ipynb b/tools/mypy_ignores/mypy_ignores.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d4a46946-bc43-400d-a25f-cdf7564c702e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import re\n",
+    "import datetime\n",
+    "from collections import defaultdict\n",
+    "\n",
+    "import attr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "106a80be-5228-4962-8f1a-12a222404e03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "LOG_FILENAME = \"~/Downloads/mypy_20240130.log\"\n",
+    "REPO_BASE_PATH = \"~/dev/datalens-backend\"\n",
+    "\n",
+    "PKG_RUN_PREFIX = \"Cmd: ['mypy', '--cache-dir=/tmp/mypy_cache']; cwd=data/\"\n",
+    "CWD_PREFIX = \"/src/\"  # \"/data/\"\n",
+    "pkg_run_cmd_re= re.compile(\n",
+    "    \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z Cmd: \\['mypy', '\\-\\-cache-dir=/tmp/mypy_cache'\\]; cwd=(.+)\"\n",
+    ")\n",
+    "\n",
+    "err_line_re = re.compile(\n",
+    "    \"\\d{4}\\-\\d{2}\\-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+Z (?P<path>[^:]+):(?P<line>\\d+): (?P<status>\\w+): (?P<msg>.*)\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0859bc96-d443-4b3e-a338-616a8ca04978",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@attr.s\n",
+    "class ErrInfo:\n",
+    "    pkg: str = attr.ib()\n",
+    "    path: str = attr.ib()\n",
+    "    line: int = attr.ib()  # starting from 0\n",
+    "    msg_list: list[str] = attr.ib()\n",
+    "\n",
+    "    def get_comment(self) -> str:\n",
+    "        # Use only first err message if there are more than one per line\n",
+    "        return f\"  # type: ignore  # {datetime.date.today().isoformat()} # TODO: {self.msg_list[0]}\"\n",
+    "\n",
+    "    def get_full_path(self) -> str:\n",
+    "        return os.path.join(self.pkg, self.path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ad5054d-88b7-473f-88ff-4683c4106c5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_errs: defaultdict[str: list[ErrInfo]] = defaultdict(list)\n",
+    "\n",
+    "with open(LOG_FILENAME, \"r\") as log_file:\n",
+    "    pkg_name = None\n",
+    "    for line in log_file:\n",
+    "        line = line.strip()\n",
+    "        run_cmd_match = pkg_run_cmd_re.match(line)\n",
+    "        if run_cmd_match:\n",
+    "            pkg_name = run_cmd_match.groups()[0].replace(CWD_PREFIX, \"\")\n",
+    "\n",
+    "        match = err_line_re.match(line)\n",
+    "        if match:\n",
+    "            groups = match.groupdict()\n",
+    "\n",
+    "            if not pkg_name:\n",
+    "                continue\n",
+    "\n",
+    "            if groups[\"status\"] == \"error\":\n",
+    "                err_info = ErrInfo(\n",
+    "                    pkg=pkg_name,\n",
+    "                    path=groups[\"path\"],\n",
+    "                    line=int(groups[\"line\"]) - 1,\n",
+    "                    msg_list=[groups[\"msg\"]],\n",
+    "                )\n",
+    "                assert err_info.line >= 0\n",
+    "\n",
+    "                file_errs[err_info.get_full_path()].append(err_info)\n",
+    "\n",
+    "print(len(file_errs))\n",
+    "\n",
+    "# Merge single row errors\n",
+    "for file_path, ei_list in file_errs.items():\n",
+    "    new_ei_list = []\n",
+    "    prev_ei = None\n",
+    "    for ei in ei_list:\n",
+    "        if not prev_ei:\n",
+    "            new_ei_list.append(ei)\n",
+    "            prev_ei = ei\n",
+    "        else:\n",
+    "            if ei.line == prev_ei.line:\n",
+    "                prev_ei.msg_list.extend(ei.msg_list)\n",
+    "            else:\n",
+    "                new_ei_list.append(ei)\n",
+    "                prev_ei = ei\n",
+    "    file_errs[file_path] = new_ei_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4f45bd1-5c56-4d0d-9937-5fa12350832b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "paths = list(\n",
+    "    filter(\n",
+    "        lambda t: (\n",
+    "            # t.startswith(\"lib/\")\n",
+    "            # (\n",
+    "            #     t.startswith(\"lib/\") or \n",
+    "            #     t.startswith(\"app/\")\n",
+    "            # ) and \n",
+    "            # not t.startswith(\"lib/dl_formula/dl_formula/parser/antlr/\")\n",
+    "        ), \n",
+    "        file_errs.keys(),\n",
+    "    )\n",
+    ")\n",
+    "len(paths)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c74069c0-735f-46b1-b23f-8dbec456157c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for filename in file_errs.keys():\n",
+    "    path = os.path.join(REPO_BASE_PATH, filename)\n",
+    "    # print(path)\n",
+    "    # print(len(file_errs[filename]))\n",
+    "    with open(path, 'r') as f:\n",
+    "        lines = f.readlines()\n",
+    "\n",
+    "    for ei in file_errs[filename]:\n",
+    "        line = lines[ei.line].rstrip()\n",
+    "        if \"#\" in line:\n",
+    "            print(f\"!!! Multiple comments in one line: {path}:{ei.line + 1}\")\n",
+    "        lines[ei.line] = f\"{line}{ei.get_comment()}\\n\"\n",
+    "\n",
+    "    with open(path, 'w') as f:\n",
+    "        f.writelines(lines)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Simple script to parse mypy logs from CI and add per-line ignores to source code.`