Skip to content

Commit c1214ef

Browse files
committed
wip
1 parent 00136b8 commit c1214ef

File tree

8 files changed

+744
-127
lines changed

8 files changed

+744
-127
lines changed

backend/kernelCI_app/helpers/trees.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def sanitize_tree(
5757
"""Sanitizes a checkout that was returned by a 'treelisting-like' query
5858
5959
Returns a Checkout object"""
60+
6061
build_status = StatusCount(
6162
PASS=checkout["pass_builds"],
6263
FAIL=checkout["fail_builds"],
@@ -87,13 +88,23 @@ def sanitize_tree(
8788
"skip": checkout["skip_boots"],
8889
}
8990

90-
if isinstance(checkout.get("git_commit_tags"), str):
91+
# Has to check if it's a string because sqlite doesn't support ArrayFields.
92+
# So if the query came from sqlite, it will be a string.
93+
git_commit_tags = checkout.get("git_commit_tags")
94+
if isinstance(git_commit_tags, str):
9195
try:
9296
checkout["git_commit_tags"] = json.loads(checkout["git_commit_tags"])
9397
if not isinstance(checkout["git_commit_tags"], list):
9498
checkout["git_commit_tags"] = []
9599
except json.JSONDecodeError:
96100
checkout["git_commit_tags"] = []
101+
elif git_commit_tags and isinstance(git_commit_tags, list):
102+
first_tag = git_commit_tags[0]
103+
if isinstance(first_tag, str):
104+
# The git_commit_tags comes as list[str] on a normal query, but `Checkout`
105+
# expects list[list[str]]. This is a workaround, the queries should *always*
106+
# return a simples list[str].
107+
checkout["git_commit_tags"] = [git_commit_tags]
97108

98109
return Checkout(
99110
**checkout,

backend/kernelCI_app/management/commands/helpers/aggregation_helpers.py

Lines changed: 94 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,20 @@
22
from typing import Sequence
33

44

5-
from django.db import connection
5+
from django.db import connections
66
from kernelCI_app.helpers.logger import out
77
from kernelCI_app.models import (
88
Checkouts,
99
PendingTest,
10-
StatusChoices,
11-
SimplifiedStatusChoices,
10+
PendingBuilds,
1211
Tests,
12+
Builds,
13+
simplify_status,
1314
)
1415
from kernelCI_app.utils import is_boot
15-
from typing import Optional
1616

1717

18-
def simplify_status(status: Optional[StatusChoices]) -> SimplifiedStatusChoices:
19-
if status == StatusChoices.PASS:
20-
return SimplifiedStatusChoices.PASS
21-
elif status == StatusChoices.FAIL:
22-
return SimplifiedStatusChoices.FAIL
23-
else:
24-
return SimplifiedStatusChoices.INCONCLUSIVE
25-
26-
27-
def convert_test(t: Tests) -> PendingTest:
18+
def _convert_test(t: Tests) -> PendingTest:
2819
return PendingTest(
2920
test_id=t.id,
3021
origin=t.origin,
@@ -36,14 +27,78 @@ def convert_test(t: Tests) -> PendingTest:
3627
)
3728

3829

30+
def _update_tree_listing(*, checkouts_instances: Sequence[Checkouts]):
31+
"""
32+
Whenever a checkout updates the latest_checkout table,
33+
we update the tree_listing table, zeroing the counts"""
34+
35+
t0 = time.time()
36+
checkout_values = [
37+
(
38+
checkout.id,
39+
checkout.origin,
40+
checkout.tree_name,
41+
checkout.git_repository_url,
42+
checkout.git_repository_branch,
43+
checkout.git_commit_hash,
44+
checkout.git_commit_name,
45+
checkout.git_commit_tags,
46+
checkout.start_time,
47+
checkout.origin_builds_finish_time,
48+
checkout.origin_tests_finish_time,
49+
)
50+
for checkout in checkouts_instances
51+
]
52+
53+
with connections["default"].cursor() as cursor:
54+
# Set values as 0 when inserting a new tree and update as 0 when tree already exists
55+
cursor.executemany(
56+
"""
57+
INSERT INTO tree_listing (
58+
checkout_id, origin, tree_name,
59+
git_repository_url, git_repository_branch, git_commit_hash,
60+
git_commit_name, git_commit_tags, start_time,
61+
origin_builds_finish_time, origin_tests_finish_time,
62+
pass_builds, fail_builds, inconclusive_builds,
63+
pass_boots, fail_boots, inconclusive_boots,
64+
pass_tests, fail_tests, inconclusive_tests
65+
)
66+
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 0, 0, 0, 0, 0, 0, 0, 0, 0)
67+
ON CONFLICT (origin, tree_name, git_repository_url, git_repository_branch)
68+
DO UPDATE SET
69+
checkout_id = EXCLUDED.checkout_id,
70+
git_commit_hash = EXCLUDED.git_commit_hash,
71+
git_commit_name = EXCLUDED.git_commit_name,
72+
git_commit_tags = EXCLUDED.git_commit_tags,
73+
start_time = EXCLUDED.start_time,
74+
origin_builds_finish_time = EXCLUDED.origin_builds_finish_time,
75+
origin_tests_finish_time = EXCLUDED.origin_tests_finish_time,
76+
pass_builds = 0,
77+
fail_builds = 0,
78+
inconclusive_builds = 0,
79+
pass_boots = 0,
80+
fail_boots = 0,
81+
inconclusive_boots = 0,
82+
pass_tests = 0,
83+
fail_tests = 0,
84+
inconclusive_tests = 0
85+
WHERE tree_listing.start_time < EXCLUDED.start_time
86+
""",
87+
checkout_values,
88+
)
89+
out(
90+
f"upserted {len(checkouts_instances)} tree_listing trees in {time.time() - t0:.3f}s"
91+
)
92+
93+
3994
def aggregate_checkouts(checkouts_instances: Sequence[Checkouts]) -> None:
4095
"""
4196
Insert checkouts on latest_checkouts table,
4297
maintaining only the latest ones for each
4398
(origin, tree_name, git_repository_url, git_repository_branch) combination
4499
"""
45100
t0 = time.time()
46-
values = [
101+
checkout_values = [
47102
(
48103
checkout.id,
49104
checkout.origin,
@@ -55,7 +110,7 @@ def aggregate_checkouts(checkouts_instances: Sequence[Checkouts]) -> None:
55110
for checkout in checkouts_instances
56111
]
57112

58-
with connection.cursor() as cursor:
113+
with connections["default"].cursor() as cursor:
59114
cursor.executemany(
60115
"""
61116
INSERT INTO latest_checkout (
@@ -69,7 +124,7 @@ def aggregate_checkouts(checkouts_instances: Sequence[Checkouts]) -> None:
69124
checkout_id = EXCLUDED.checkout_id
70125
WHERE latest_checkout.start_time < EXCLUDED.start_time
71126
""",
72-
values,
127+
checkout_values,
73128
)
74129
out(f"inserted {len(checkouts_instances)} checkouts in {time.time() - t0:.3f}s")
75130

@@ -80,7 +135,7 @@ def aggregate_tests(
80135
"""Insert tests data on pending_tests table to be processed later"""
81136
t0 = time.time()
82137
pending_tests = (
83-
convert_test(test)
138+
_convert_test(test)
84139
for test in tests_instances
85140
if test.environment_misc and test.environment_misc.get("platform") is not None
86141
)
@@ -95,9 +150,29 @@ def aggregate_tests(
95150
)
96151

97152

98-
def aggregate_checkouts_and_tests(
153+
def aggregate_builds(
154+
build_instances: Sequence[Builds],
155+
) -> None:
156+
"""Insert builds data on pending_builds table to be processed later"""
157+
t0 = time.time()
158+
pending_builds = (PendingBuilds.from_builds(build) for build in build_instances)
159+
160+
if pending_builds:
161+
pending_builds_inserted = PendingBuilds.objects.bulk_create(
162+
pending_builds,
163+
ignore_conflicts=True,
164+
)
165+
out(
166+
f"bulk_create pending_builds: n={len(pending_builds_inserted)} in {time.time() - t0:.3f}s"
167+
)
168+
169+
170+
def aggregate_checkouts_and_pendings(
99171
checkouts_instances: Sequence[Checkouts],
100172
tests_instances: Sequence[Tests],
173+
build_instances: Sequence[Builds],
101174
) -> None:
102175
aggregate_checkouts(checkouts_instances)
176+
_update_tree_listing(checkouts_instances=checkouts_instances)
103177
aggregate_tests(tests_instances)
178+
aggregate_builds(build_instances)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from kernelCI_app.models import TreeListing
2+
3+
4+
def _update_tree_listing(*, tree_listing_objects: list[TreeListing]) -> None:
5+
"""Helper function to centralize and reutilize updates to TreeListing"""
6+
7+
if not tree_listing_objects:
8+
return
9+
10+
TreeListing.objects.bulk_create(
11+
tree_listing_objects,
12+
update_conflicts=True,
13+
unique_fields=[
14+
"origin",
15+
"tree_name",
16+
"git_repository_branch",
17+
"git_repository_url",
18+
],
19+
update_fields=[
20+
"field_timestamp",
21+
"checkout_id",
22+
"origin",
23+
"tree_name",
24+
"git_repository_url",
25+
"git_repository_branch",
26+
"git_commit_hash",
27+
"git_commit_name",
28+
"git_commit_tags",
29+
"start_time",
30+
"origin_builds_finish_time",
31+
"origin_tests_finish_time",
32+
"pass_builds",
33+
"fail_builds",
34+
"done_builds",
35+
"miss_builds",
36+
"skip_builds",
37+
"error_builds",
38+
"null_builds",
39+
"pass_boots",
40+
"fail_boots",
41+
"done_boots",
42+
"miss_boots",
43+
"skip_boots",
44+
"error_boots",
45+
"null_boots",
46+
"pass_tests",
47+
"fail_tests",
48+
"done_tests",
49+
"miss_tests",
50+
"skip_tests",
51+
"error_tests",
52+
"null_tests",
53+
],
54+
)
55+
print(f"Updated {len(tree_listing_objects)} trees in TreeListing", flush=True)

backend/kernelCI_app/management/commands/helpers/kcidbng_ingester.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
)
2727
import kcidb_io
2828
from kernelCI_app.management.commands.helpers.aggregation_helpers import (
29-
aggregate_checkouts_and_tests,
29+
aggregate_checkouts_and_pendings,
3030
)
3131
from django.db import connections, transaction
3232
from kernelCI_app.models import Issues, Checkouts, Builds, Tests, Incidents
@@ -200,9 +200,10 @@ def flush_buffers(
200200
consume_buffer(builds_buf, "builds")
201201
consume_buffer(tests_buf, "tests")
202202
consume_buffer(incidents_buf, "incidents")
203-
aggregate_checkouts_and_tests(
203+
aggregate_checkouts_and_pendings(
204204
checkouts_instances=checkouts_buf,
205205
tests_instances=tests_buf,
206+
build_instances=builds_buf,
206207
)
207208
for filename, filepath in buffer_files:
208209
os.rename(filepath, os.path.join(archive_dir, filename))

0 commit comments

Comments
 (0)