Skip to content

Commit c848a5a

Browse files
feat: async investigate_fragment task; celery results backend (#8428)
* feat: investigate docs asynchronously * refactor: move script to its own js file * fix: adjust polling interval/duration * test: test new task * fix: extra tag/fix whitespace * style: restore whitespace (I hope) * style: black/standard styling * test: fix test of investigate view * test: improve/delint tests
1 parent df27ba9 commit c848a5a

File tree

10 files changed

+386
-124
lines changed

10 files changed

+386
-124
lines changed

ietf/doc/forms.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ class InvestigateForm(forms.Form):
276276
),
277277
min_length=8,
278278
)
279+
task_id = forms.CharField(required=False, widget=forms.HiddenInput)
279280

280281
def clean_name_fragment(self):
281282
disallowed_characters = ["%", "/", "\\", "*"]

ietf/doc/tasks.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
generate_idnits2_rfcs_obsoleted,
3232
update_or_create_draft_bibxml_file,
3333
ensure_draft_bibxml_path_exists,
34+
investigate_fragment,
3435
)
3536

3637

@@ -119,3 +120,11 @@ def generate_draft_bibxml_files_task(days=7, process_all=False):
119120
update_or_create_draft_bibxml_file(event.doc, event.rev)
120121
except Exception as err:
121122
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")
123+
124+
125+
@shared_task(ignore_result=False)
126+
def investigate_fragment_task(name_fragment: str):
127+
return {
128+
"name_fragment": name_fragment,
129+
"results": investigate_fragment(name_fragment),
130+
}

ietf/doc/tests.py

Lines changed: 121 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3280,44 +3280,152 @@ def test_investigate_fragment(self):
32803280
"draft-this-should-not-be-possible-00.txt",
32813281
)
32823282

3283-
def test_investigate(self):
3283+
def test_investigate_get(self):
3284+
"""GET with no querystring should retrieve the investigate UI"""
32843285
url = urlreverse("ietf.doc.views_doc.investigate")
32853286
login_testing_unauthorized(self, "secretary", url)
32863287
r = self.client.get(url)
32873288
self.assertEqual(r.status_code, 200)
32883289
q = PyQuery(r.content)
32893290
self.assertEqual(len(q("form#investigate")), 1)
32903291
self.assertEqual(len(q("div#results")), 0)
3291-
r = self.client.post(url, dict(name_fragment="this-is-not-found"))
3292+
3293+
@mock.patch("ietf.doc.views_doc.AsyncResult")
3294+
def test_investgate_get_task_id(self, mock_asyncresult):
3295+
"""GET with querystring should lookup task status"""
3296+
url = urlreverse("ietf.doc.views_doc.investigate")
3297+
login_testing_unauthorized(self, "secretary", url)
3298+
mock_asyncresult.return_value.ready.return_value = True
3299+
r = self.client.get(url + "?id=a-task-id")
3300+
self.assertEqual(r.status_code, 200)
3301+
self.assertEqual(r.json(), {"status": "ready"})
3302+
self.assertTrue(mock_asyncresult.called)
3303+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3304+
mock_asyncresult.reset_mock()
3305+
3306+
mock_asyncresult.return_value.ready.return_value = False
3307+
r = self.client.get(url + "?id=a-task-id")
3308+
self.assertEqual(r.status_code, 200)
3309+
self.assertEqual(r.json(), {"status": "notready"})
3310+
self.assertTrue(mock_asyncresult.called)
3311+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3312+
3313+
@mock.patch("ietf.doc.views_doc.investigate_fragment_task")
3314+
def test_investigate_post(self, mock_investigate_fragment_task):
3315+
"""POST with a name_fragment and no task_id should start a celery task"""
3316+
url = urlreverse("ietf.doc.views_doc.investigate")
3317+
login_testing_unauthorized(self, "secretary", url)
3318+
3319+
# test some invalid cases
3320+
r = self.client.post(url, {"name_fragment": "short"}) # limit is >= 8 characters
32923321
self.assertEqual(r.status_code, 200)
32933322
q = PyQuery(r.content)
3323+
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
3324+
self.assertFalse(mock_investigate_fragment_task.delay.called)
3325+
for char in ["*", "%", "/", "\\"]:
3326+
r = self.client.post(url, {"name_fragment": f"bad{char}character"})
3327+
self.assertEqual(r.status_code, 200)
3328+
q = PyQuery(r.content)
3329+
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
3330+
self.assertFalse(mock_investigate_fragment_task.delay.called)
3331+
3332+
# now a valid one
3333+
mock_investigate_fragment_task.delay.return_value.id = "a-task-id"
3334+
r = self.client.post(url, {"name_fragment": "this-is-a-valid-fragment"})
3335+
self.assertEqual(r.status_code, 200)
3336+
self.assertTrue(mock_investigate_fragment_task.delay.called)
3337+
self.assertEqual(mock_investigate_fragment_task.delay.call_args, mock.call("this-is-a-valid-fragment"))
3338+
self.assertEqual(r.json(), {"id": "a-task-id"})
3339+
3340+
@mock.patch("ietf.doc.views_doc.AsyncResult")
3341+
def test_investigate_post_task_id(self, mock_asyncresult):
3342+
"""POST with name_fragment and task_id should retrieve results"""
3343+
url = urlreverse("ietf.doc.views_doc.investigate")
3344+
login_testing_unauthorized(self, "secretary", url)
3345+
3346+
# First, test a non-successful result - this could be a failure or non-existent task id
3347+
mock_result = mock_asyncresult.return_value
3348+
mock_result.successful.return_value = False
3349+
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
3350+
self.assertContains(r, "The investigation task failed.", status_code=200)
3351+
self.assertTrue(mock_asyncresult.called)
3352+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3353+
self.assertFalse(mock_result.get.called)
3354+
mock_asyncresult.reset_mock()
3355+
q = PyQuery(r.content)
3356+
self.assertEqual(q("#id_name_fragment").val(), "some-fragment")
3357+
self.assertEqual(q("#id_task_id").val(), "a-task-id")
3358+
3359+
# now the various successful result mixes
3360+
mock_result = mock_asyncresult.return_value
3361+
mock_result.successful.return_value = True
3362+
mock_result.get.return_value = {
3363+
"name_fragment": "different-fragment",
3364+
"results": {
3365+
"can_verify": set(),
3366+
"unverifiable_collections": set(),
3367+
"unexpected": set(),
3368+
}
3369+
}
3370+
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
3371+
self.assertEqual(r.status_code, 200)
3372+
self.assertTrue(mock_asyncresult.called)
3373+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3374+
mock_asyncresult.reset_mock()
3375+
q = PyQuery(r.content)
3376+
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
3377+
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
32943378
self.assertEqual(len(q("div#results")), 1)
32953379
self.assertEqual(len(q("table#authenticated")), 0)
32963380
self.assertEqual(len(q("table#unverifiable")), 0)
32973381
self.assertEqual(len(q("table#unexpected")), 0)
3298-
r = self.client.post(url, dict(name_fragment="mixed-provenance"))
3382+
3383+
# This file was created in setUp. It allows the view to render properly
3384+
# but its location / content don't matter for this test otherwise.
3385+
a_file_that_exists = Path(settings.INTERNET_DRAFT_PATH) / "draft-this-is-active-00.txt"
3386+
3387+
mock_result.get.return_value = {
3388+
"name_fragment": "different-fragment",
3389+
"results": {
3390+
"can_verify": {a_file_that_exists},
3391+
"unverifiable_collections": {a_file_that_exists},
3392+
"unexpected": set(),
3393+
}
3394+
}
3395+
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
32993396
self.assertEqual(r.status_code, 200)
3397+
self.assertTrue(mock_asyncresult.called)
3398+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3399+
mock_asyncresult.reset_mock()
33003400
q = PyQuery(r.content)
3401+
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
3402+
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
33013403
self.assertEqual(len(q("div#results")), 1)
33023404
self.assertEqual(len(q("table#authenticated")), 1)
33033405
self.assertEqual(len(q("table#unverifiable")), 1)
33043406
self.assertEqual(len(q("table#unexpected")), 0)
3305-
r = self.client.post(url, dict(name_fragment="not-be-possible"))
3407+
3408+
mock_result.get.return_value = {
3409+
"name_fragment": "different-fragment",
3410+
"results": {
3411+
"can_verify": set(),
3412+
"unverifiable_collections": set(),
3413+
"unexpected": {a_file_that_exists},
3414+
}
3415+
}
3416+
r = self.client.post(url, {"name_fragment": "some-fragment", "task_id": "a-task-id"})
33063417
self.assertEqual(r.status_code, 200)
3418+
self.assertTrue(mock_asyncresult.called)
3419+
self.assertEqual(mock_asyncresult.call_args, mock.call("a-task-id"))
3420+
mock_asyncresult.reset_mock()
33073421
q = PyQuery(r.content)
3422+
self.assertEqual(q("#id_name_fragment").val(), "different-fragment", "name_fragment should be reset")
3423+
self.assertEqual(q("#id_task_id").val(), "", "task_id should be cleared")
33083424
self.assertEqual(len(q("div#results")), 1)
33093425
self.assertEqual(len(q("table#authenticated")), 0)
33103426
self.assertEqual(len(q("table#unverifiable")), 0)
33113427
self.assertEqual(len(q("table#unexpected")), 1)
3312-
r = self.client.post(url, dict(name_fragment="short"))
3313-
self.assertEqual(r.status_code, 200)
3314-
q = PyQuery(r.content)
3315-
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
3316-
for char in ["*", "%", "/", "\\"]:
3317-
r = self.client.post(url, dict(name_fragment=f"bad{char}character"))
3318-
self.assertEqual(r.status_code, 200)
3319-
q = PyQuery(r.content)
3320-
self.assertEqual(len(q("#id_name_fragment.is-invalid")), 1)
3428+
33213429

33223430
class LogIOErrorTests(TestCase):
33233431

ietf/doc/tests_tasks.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
generate_draft_bibxml_files_task,
2121
generate_idnits2_rfcs_obsoleted_task,
2222
generate_idnits2_rfc_status_task,
23+
investigate_fragment_task,
2324
notify_expirations_task,
2425
)
2526

@@ -98,6 +99,18 @@ def test_expire_last_calls_task(self, mock_get_expired, mock_expire):
9899
self.assertEqual(mock_expire.call_args_list[1], mock.call(docs[1]))
99100
self.assertEqual(mock_expire.call_args_list[2], mock.call(docs[2]))
100101

102+
def test_investigate_fragment_task(self):
103+
investigation_results = object() # singleton
104+
with mock.patch(
105+
"ietf.doc.tasks.investigate_fragment", return_value=investigation_results
106+
) as mock_inv:
107+
retval = investigate_fragment_task("some fragment")
108+
self.assertTrue(mock_inv.called)
109+
self.assertEqual(mock_inv.call_args, mock.call("some fragment"))
110+
self.assertEqual(
111+
retval, {"name_fragment": "some fragment", "results": investigation_results}
112+
)
113+
101114

102115
class Idnits2SupportTests(TestCase):
103116
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ['DERIVED_DIR']

ietf/doc/views_doc.py

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@
4141

4242
from pathlib import Path
4343

44+
from celery.result import AsyncResult
4445
from django.core.cache import caches
4546
from django.core.exceptions import PermissionDenied
4647
from django.db.models import Max
47-
from django.http import HttpResponse, Http404, HttpResponseBadRequest
48+
from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse
4849
from django.shortcuts import render, get_object_or_404, redirect
4950
from django.template.loader import render_to_string
5051
from django.urls import reverse as urlreverse
@@ -59,8 +60,9 @@
5960
ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent,
6061
IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor,
6162
RelatedDocument, RelatedDocHistory)
63+
from ietf.doc.tasks import investigate_fragment_task
6264
from ietf.doc.utils import (augment_events_with_revision,
63-
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id, investigate_fragment,
65+
can_adopt_draft, can_unadopt_draft, get_chartering_type, get_tags_for_stream_id,
6466
needed_ballot_positions, nice_consensus, update_telechat, has_same_ballot,
6567
get_initial_notify, make_notify_changed_event, make_rev_history, default_consensus,
6668
add_events_message_info, get_unicode_document_content,
@@ -2275,16 +2277,67 @@ def idnits2_state(request, name, rev=None):
22752277
content_type="text/plain;charset=utf-8",
22762278
)
22772279

2280+
22782281
@role_required("Secretariat")
22792282
def investigate(request):
2283+
"""Investigate a fragment
2284+
2285+
A plain GET with no querystring returns the UI page.
2286+
2287+
POST with the task_id field empty starts an async task and returns a JSON response with
2288+
the ID needed to monitor the task for results.
2289+
2290+
GET with a querystring parameter "id" will poll the status of the async task and return "ready"
2291+
or "notready".
2292+
2293+
POST with the task_id field set to the id of a "ready" task will return its results or an error
2294+
if the task failed or the id is invalid (expired, never exited, etc).
2295+
"""
22802296
results = None
2297+
# Start an investigation or retrieve a result on a POST
22812298
if request.method == "POST":
22822299
form = InvestigateForm(request.POST)
22832300
if form.is_valid():
2284-
name_fragment = form.cleaned_data["name_fragment"]
2285-
results = investigate_fragment(name_fragment)
2301+
task_id = form.cleaned_data["task_id"]
2302+
if task_id:
2303+
# Ignore the rest of the form and retrieve the result
2304+
task_result = AsyncResult(task_id)
2305+
if task_result.successful():
2306+
retval = task_result.get()
2307+
results = retval["results"]
2308+
form.data = form.data.copy()
2309+
form.data["name_fragment"] = retval[
2310+
"name_fragment"
2311+
] # ensure consistency
2312+
del form.data["task_id"] # do not request the task result again
2313+
else:
2314+
form.add_error(
2315+
None,
2316+
"The investigation task failed. Please try again and ask for help if this recurs.",
2317+
)
2318+
# Falls through to the render at the end!
2319+
else:
2320+
name_fragment = form.cleaned_data["name_fragment"]
2321+
task_result = investigate_fragment_task.delay(name_fragment)
2322+
return JsonResponse({"id": task_result.id})
22862323
else:
2287-
form = InvestigateForm()
2324+
task_id = request.GET.get("id", None)
2325+
if task_id is not None:
2326+
# Check status if we got the "id" parameter
2327+
task_result = AsyncResult(task_id)
2328+
return JsonResponse(
2329+
{"status": "ready" if task_result.ready() else "notready"}
2330+
)
2331+
else:
2332+
# Serve up an empty form
2333+
form = InvestigateForm()
2334+
2335+
# If we get here, it is just a plain GET - serve the UI
22882336
return render(
2289-
request, "doc/investigate.html", context=dict(form=form, results=results)
2337+
request,
2338+
"doc/investigate.html",
2339+
context={
2340+
"form": form,
2341+
"results": results,
2342+
},
22902343
)

ietf/settings.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ def skip_unreadable_post(record):
452452
'django_vite',
453453
'django_bootstrap5',
454454
'django_celery_beat',
455+
'django_celery_results',
455456
'corsheaders',
456457
'django_markup',
457458
'oidc_provider',
@@ -1226,7 +1227,9 @@ def skip_unreadable_post(record):
12261227
# https://docs.celeryq.dev/en/stable/userguide/tasks.html#rpc-result-backend-rabbitmq-qpid
12271228
# Results can be retrieved only once and only by the caller of the task. Results will be
12281229
# lost if the message broker restarts.
1229-
CELERY_RESULT_BACKEND = 'rpc://' # sends a msg via the msg broker
1230+
CELERY_RESULT_BACKEND = 'django-cache' # use a Django cache for results
1231+
CELERY_CACHE_BACKEND = 'celery-results' # which Django cache to use
1232+
CELERY_RESULT_EXPIRES = datetime.timedelta(minutes=5) # how long are results valid? (Default is 1 day)
12301233
CELERY_TASK_IGNORE_RESULT = True # ignore results unless specifically enabled for a task
12311234

12321235
# Meetecho API setup: Uncomment this and provide real credentials to enable
@@ -1309,6 +1312,11 @@ def skip_unreadable_post(record):
13091312
"MAX_ENTRIES": 5000,
13101313
},
13111314
},
1315+
"celery-results": {
1316+
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
1317+
"LOCATION": f"{MEMCACHED_HOST}:{MEMCACHED_PORT}",
1318+
"KEY_PREFIX": "ietf:celery",
1319+
},
13121320
}
13131321
else:
13141322
CACHES = {
@@ -1347,6 +1355,11 @@ def skip_unreadable_post(record):
13471355
"MAX_ENTRIES": 5000,
13481356
},
13491357
},
1358+
"celery-results": {
1359+
"BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache",
1360+
"LOCATION": "app:11211",
1361+
"KEY_PREFIX": "ietf:celery",
1362+
},
13501363
}
13511364

13521365
PUBLISH_IPR_STATES = ['posted', 'removed', 'removed_objfalse']

0 commit comments

Comments
 (0)