From 7c95bec464dd06821c926f4b37ea806c00e61526 Mon Sep 17 00:00:00 2001 From: Dave O'Connor Date: Thu, 11 Dec 2025 12:13:03 -0800 Subject: [PATCH 1/5] Release report logging improvements --- libraries/admin.py | 1 + libraries/tasks.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/libraries/admin.py b/libraries/admin.py index 9a5c78f0..14484146 100644 --- a/libraries/admin.py +++ b/libraries/admin.py @@ -189,6 +189,7 @@ def get_context_data(self, **kwargs): def generate_report(self): uri = f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{self.request.get_host()}" + logger.info("Queuing release report") generate_release_report.delay( user_id=self.request.user.id, params=self.request.GET, diff --git a/libraries/tasks.py b/libraries/tasks.py index e1bedf85..45664c73 100644 --- a/libraries/tasks.py +++ b/libraries/tasks.py @@ -253,6 +253,7 @@ def update_issues(clean=False): @app.task def generate_release_report(user_id: int, params: dict, base_uri: str = None): """Generate a release report asynchronously and save it in RenderedContent.""" + logger.info(f"Starting generate_release_report task, {settings.LOCAL_DEVELOPMENT=}") from libraries.forms import CreateReportForm form = CreateReportForm(params) @@ -265,7 +266,9 @@ def generate_release_report(user_id: int, params: dict, base_uri: str = None): created_by_id=user_id, report_configuration_id=params.get("report_configuration"), ) + logger.info(f"Saving release_report {params.get('report_configuration')=}") release_report.save() + logger.info(f"generate release report pdf {release_report.pk=}") generate_release_report_pdf.delay( release_report.pk, html=html, publish=params.get("publish") ) From ef46787dd8968d83d09923976019cee7d2ea09fd Mon Sep 17 00:00:00 2001 From: Dave O'Connor Date: Thu, 11 Dec 2025 12:15:27 -0800 Subject: [PATCH 2/5] Release report: match new mailing list stats source, stop words; adjust downloads table layout to fit --- libraries/tasks.py | 78 +++++++++++++--------- reports/generation.py | 5 +- templates/admin/release_report_detail.html | 6 +- 3 files changed, 52 insertions(+), 37 deletions(-) diff --git a/libraries/tasks.py b/libraries/tasks.py index 45664c73..73b04f4a 100644 --- a/libraries/tasks.py +++ b/libraries/tasks.py @@ -19,7 +19,7 @@ CommitAuthor, ReleaseReport, ) -from mailing_list.models import EmailData, PostingData, SubscriptionData +from mailing_list.models import EmailData, PostingData from reports.generation import ( generate_algolia_words, generate_wordcloud, @@ -591,40 +591,52 @@ def get_mailing_list_stats(prior_version_id: int, version_id: int): @shared_task def get_new_subscribers_stats(start_date: date, end_date: date): - data = ( - SubscriptionData.objects.filter( - subscription_dt__gte=start_date, - subscription_dt__lte=end_date, - list="boost", - ) - .annotate( - week=ExtractWeek("subscription_dt"), - iso_year=ExtractIsoYear("subscription_dt"), - ) - .values("iso_year", "week") - .annotate(count=Count("id")) - .order_by("iso_year", "week") - ) + """Get new subscribers statistics for HyperKitty mailing list using raw SQL.""" + import psycopg2 + from django.conf import settings - # Convert data into a dict for easy lookup - counts_by_week = {(row["iso_year"], row["week"]): row["count"] for row in data} + conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL) - # Iterate through every ISO week in the date range - current = start_date - seen = set() - chart_data = [] - while current <= end_date: - iso_year, iso_week, _ = current.isocalendar() - key = (iso_year, iso_week) - if key not in seen: # skip duplicate weeks in the same loop - seen.add(key) - year_suffix = str(iso_year)[2:] - label = f"{iso_week} ({year_suffix})" - count = counts_by_week.get(key, 0) - chart_data.append({"x": label, "y": count}) - current += timedelta(days=7) # hop by weeks - - return chart_data + try: + with conn.cursor() as cursor: + cursor.execute( + """ + SELECT + EXTRACT(ISOYEAR FROM date_joined) as iso_year, + EXTRACT(WEEK FROM date_joined) as iso_week, + COUNT(*) as count + FROM auth_user + WHERE date_joined::date >= %s + AND date_joined::date <= %s + GROUP BY iso_year, iso_week + ORDER BY iso_year, iso_week + """, + [start_date, end_date], + ) + + data = cursor.fetchall() + + # Convert data into a dict for easy lookup + counts_by_week = {(int(row[0]), int(row[1])): row[2] for row in data} + + # Iterate through every ISO week in the date range + current = start_date + seen = set() + chart_data = [] + while current <= end_date: + iso_year, iso_week, _ = current.isocalendar() + key = (iso_year, iso_week) + if key not in seen: # skip duplicate weeks in the same loop + seen.add(key) + year_suffix = str(iso_year)[2:] + label = f"{iso_week} ({year_suffix})" + count = counts_by_week.get(key, 0) + chart_data.append({"x": label, "y": count}) + current += timedelta(days=7) # hop by weeks + + return chart_data + finally: + conn.close() @shared_task diff --git a/reports/generation.py b/reports/generation.py index d50d37ff..8fe4bf03 100644 --- a/reports/generation.py +++ b/reports/generation.py @@ -44,8 +44,11 @@ def generate_mailinglist_words( word_frequencies = {} for content in get_mail_content(version, prior_version): for key, val in WordCloud().process_text(content).items(): + key = key.strip() if len(key) < 2: continue + if key in SiteSettings.load().wordcloud_ignore_set: + continue key_lower = key.lower() if key_lower not in word_frequencies: word_frequencies[key_lower] = 0 @@ -91,7 +94,7 @@ def generate_wordcloud( background_color=None, width=width, height=height, - stopwords=STOPWORDS | SiteSettings.load().wordcloud_ignore_set, + stopwords=STOPWORDS, font_path=font_full_path, ) word_frequencies = boost_normalize_words( diff --git a/templates/admin/release_report_detail.html b/templates/admin/release_report_detail.html index d49f50a8..e38cf20a 100644 --- a/templates/admin/release_report_detail.html +++ b/templates/admin/release_report_detail.html @@ -94,12 +94,12 @@

{% endif %} {% if downloads %} - +
@@ -113,7 +113,7 @@

{{ os }} {% endif %} -

From d468c89f0d3416320a0bc06c97cda9d1d2a4deac Mon Sep 17 00:00:00 2001 From: Dave O'Connor Date: Thu, 11 Dec 2025 17:12:06 -0800 Subject: [PATCH 3/5] Rework of release report task workflow to work under prefork --- libraries/admin.py | 66 +++++++++++++++++++++++--- libraries/forms.py | 114 +++++++++++++++++++++++++++++++++++++++++++++ libraries/tasks.py | 32 +++++++++++-- 3 files changed, 200 insertions(+), 12 deletions(-) diff --git a/libraries/admin.py b/libraries/admin.py index 14484146..a0dc4fec 100644 --- a/libraries/admin.py +++ b/libraries/admin.py @@ -1,4 +1,5 @@ import structlog +from datetime import date from django.conf import settings from django.contrib import admin, messages from django.core.exceptions import ValidationError @@ -12,9 +13,11 @@ from django.shortcuts import redirect from django.views.generic import TemplateView from django import forms +from celery import chain, group from core.admin_filters import StaffUserCreatedByFilter from libraries.forms import CreateReportForm, CreateReportFullForm +from reports.generation import determine_versions from versions.models import Version from versions.tasks import import_all_library_versions from .filters import ReportConfigurationFilter @@ -31,15 +34,22 @@ WordcloudMergeWord, ) from .tasks import ( + count_mailinglist_contributors, + count_commit_contributors_totals, generate_library_report, + generate_mailinglist_cloud, + generate_release_report_with_stats, + generate_search_cloud, + get_mailing_list_stats, + get_new_contributors_count, + get_new_subscribers_stats, + synchronize_commit_author_user_data, update_authors_and_maintainers, update_commit_author_github_data, update_commits, update_issues, update_libraries, update_library_version_documentation_urls_all_versions, - generate_release_report, - synchronize_commit_author_user_data, ) from .utils import generate_release_report_filename @@ -189,12 +199,52 @@ def get_context_data(self, **kwargs): def generate_report(self): uri = f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{self.request.get_host()}" - logger.info("Queuing release report") - generate_release_report.delay( - user_id=self.request.user.id, - params=self.request.GET, - base_uri=uri, + logger.info("Queuing release report workflow") + + # Get the report configuration to determine version parameters + form = self.get_form() + if not form.is_valid(): + return + + report_configuration = form.cleaned_data["report_configuration"] + + # NOTE TO FUTURE DEVS: remember to account for the fact that a report + # configuration may not match with a real version in frequent cases where + # reports are generated before the release version has been created. + (report_before_release, prior_version, version) = determine_versions( + report_configuration.version + ) + + # trigger stats tasks first to run in parallel using group, then chain the final + # report generation task + stats_tasks = group( + [ + count_mailinglist_contributors.s(prior_version.pk, version.pk), + get_mailing_list_stats.s(prior_version.pk, version.pk), + count_commit_contributors_totals.s(version.pk, prior_version.pk), + get_new_subscribers_stats.s( + prior_version.release_date, version.release_date or date.today() + ), + generate_mailinglist_cloud.s(prior_version.pk, version.pk), + # if the report is based on a live version, look for stats for that + # version, otherwise use the stats for the prior (live) version + generate_search_cloud.s( + prior_version.pk if report_before_release else version.pk + ), + get_new_contributors_count.s(version.pk), + ] + ) + + # chain stats collection with final report generation + workflow = chain( + stats_tasks, + generate_release_report_with_stats.s( + self.request.user.id, + self.request.GET, + uri, + ), ) + workflow.apply_async() def locked_publish_check(self): form = self.get_form() @@ -246,6 +296,8 @@ class LibraryReportView(ReleaseReportView): report_type = "library report" def generate_report(self): + # For library reports, we don't need a complex stats workflow since + # CreateReportFullForm doesn't use the same async stats pattern generate_library_report.delay(self.request.GET) diff --git a/libraries/forms.py b/libraries/forms.py index 86be3570..92f3bd9e 100644 --- a/libraries/forms.py +++ b/libraries/forms.py @@ -411,6 +411,120 @@ def get_stats(self): "slack": slack_stats, } + def generate_context( + self, report_configuration: ReportConfiguration, stats_results: dict + ): + committee_members = report_configuration.financial_committee_members.all() + + # NOTE TO FUTURE DEVS: remember to account for the fact that a report + # configuration may not match with a real version in frequent cases where + # reports are generated before the release version has been created. + (report_before_release, prior_version, version) = determine_versions( + report_configuration.version + ) + + # Unpack stats_results in the same order as tasks were defined in the workflow + ( + (mailinglist_contributor_release_count, mailinglist_contributor_new_count), + (mailinglist_post_stats, total_mailinglist_count), + (commit_contributors_release_count, commit_contributors_new_count), + mailinglist_new_subscribers_stats, + ( + mailinglist_words, + mailinglist_wordcloud_base64, + mailinglist_wordcloud_top_words, + ), + (search_wordcloud_base64, search_wordcloud_top_words, search_stats), + global_contributors_new_count, + ) = stats_results + + # Compute the synchronous stats that don't require async tasks + commit_count, version_commit_count = get_commit_counts(version) + top_libraries_for_version = get_top_libraries_for_version(version) + top_libraries_by_name = get_libraries_by_name(version) + library_order = self._get_library_order(top_libraries_by_name) + # TODO: we may in future need to find a way to show the removed libraries, for + # now it's not needed. In that case the distinction between running this on a + # ReportConfiguration with a real 'version' entry vs one that instead uses 'master' + # will need to be considered + libraries = get_libraries(library_order) + new_libraries = libraries.exclude( + library_version__version__release_date__lte=prior_version.release_date + ).prefetch_related("authors") + top_contributors = get_top_contributors_for_version(version) + mailinglist_counts = get_mailinglist_counts(version) + lines_added, lines_removed = lines_changes_count(version) + opened_issues_count, closed_issues_count = get_issues_counts( + prior_version, version + ) + # TODO: connected to above todo, add removed_libraries.count() + removed_library_count = 0 + + library_data = get_library_data(library_order, prior_version.pk, version.pk) + slack_stats = get_slack_stats(prior_version, version) + + library_index_library_data = get_libraries_for_index(library_data, version) + batched_library_data = conditional_batched( + library_data, + 2, + lambda x: x.get("top_contributors_release").count() + <= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD, + ) + git_graph_data = get_git_graph_data(prior_version, version) + download = get_download_links(version) + + return { + "committee_members": committee_members, + "lines_added": lines_added, + "lines_removed": lines_removed, + "version": version, + "report_configuration": report_configuration, + "prior_version": prior_version, + "opened_issues_count": opened_issues_count, + "closed_issues_count": closed_issues_count, + "mailinglist_wordcloud_base64": mailinglist_wordcloud_base64, + "mailinglist_wordcloud_frequencies": mailinglist_wordcloud_top_words, + "mailinglist_counts": mailinglist_counts, + "mailinglist_total": total_mailinglist_count or 0, + "mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501 + "mailinglist_contributor_new_count": mailinglist_contributor_new_count, + "mailinglist_post_stats": mailinglist_post_stats, + "mailinglist_new_subscribers_stats": mailinglist_new_subscribers_stats, + "mailinglist_charts_start_year": prior_version.release_date.year, + "search_wordcloud_base64": search_wordcloud_base64, + "search_wordcloud_frequencies": search_wordcloud_top_words, + "search_stats": search_stats, + "commit_contributors_release_count": commit_contributors_release_count, + "commit_contributors_new_count": commit_contributors_new_count, + "global_contributors_new_count": global_contributors_new_count, + "commit_count": commit_count, + "version_commit_count": version_commit_count, + "top_contributors_release_overall": top_contributors, + "library_data": library_data, + "new_libraries": new_libraries, + "batched_library_data": batched_library_data, + "top_libraries_for_version": top_libraries_for_version, + "library_count": libraries.count(), + "library_index_libraries": library_index_library_data, + "added_library_count": new_libraries.count(), + "removed_library_count": removed_library_count, + "downloads": download, + "contribution_box_graph": git_graph_data, + "slack_channels": get_slack_channels(), + "slack": slack_stats, + } + + def render_with_stats(self, stats_results, base_uri=None): + """Render HTML with pre-computed stats results""" + context = self.generate_context( + self.cleaned_data["report_configuration"], stats_results + ) + if base_uri: + context["base_uri"] = base_uri + html = render_to_string(self.html_template_name, context) + self.cache_set(html) + return html + class CommitAuthorEmailForm(Form): """ diff --git a/libraries/tasks.py b/libraries/tasks.py index 73b04f4a..d3a10901 100644 --- a/libraries/tasks.py +++ b/libraries/tasks.py @@ -251,13 +251,35 @@ def update_issues(clean=False): @app.task -def generate_release_report(user_id: int, params: dict, base_uri: str = None): - """Generate a release report asynchronously and save it in RenderedContent.""" - logger.info(f"Starting generate_release_report task, {settings.LOCAL_DEVELOPMENT=}") +def generate_release_report_with_stats(stats_results, user_id, params, base_uri=None): + """Wrapper task that reorders arguments for workflow mode.""" + return generate_release_report(user_id, params, base_uri, stats_results) + + +@app.task +def generate_release_report(user_id, params, base_uri=None, stats_results=None): + """Generate a release report asynchronously and save to RenderedContent/PDF + + Args: + user_id: ID of the user creating the report + params: Form parameters for report configuration + base_uri: Base URI for the report (optional) + stats_results: Pre-collected stats from workflow (optional) + """ + logger.info(f"Starting generate_release_report {settings.LOCAL_DEVELOPMENT=}") + from libraries.forms import CreateReportForm form = CreateReportForm(params) - html = form.cache_html(base_uri=base_uri) + if not form.is_valid(): + logger.error(f"Form validation failed, {form.errors}") + return None + + if stats_results: + html = form.render_with_stats(stats_results, base_uri=base_uri) + else: + html = form.cache_html(base_uri=base_uri) + # override the base uri to reference the internal container for local dev if settings.LOCAL_DEVELOPMENT: html = update_base_tag(html, DOCKER_CONTAINER_URL_WEB) @@ -333,7 +355,7 @@ def generate_library_report(params): from libraries.forms import CreateReportFullForm form = CreateReportFullForm(params) - form.cache_html() + return form.cache_html() @app.task From 5c80567833dc14c6aaf7f47b391ad54bc2542cad Mon Sep 17 00:00:00 2001 From: Dave O'Connor Date: Thu, 11 Dec 2025 17:12:49 -0800 Subject: [PATCH 4/5] Disable release report generation on release_tasks completion as discussed with Rob --- libraries/management/commands/release_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libraries/management/commands/release_tasks.py b/libraries/management/commands/release_tasks.py index f53bbc0a..58905fe5 100644 --- a/libraries/management/commands/release_tasks.py +++ b/libraries/management/commands/release_tasks.py @@ -59,7 +59,7 @@ def set_tasks(self): Action("Updating slack activity buckets", ["fetch_slack_activity"]), Action("Updating website statistics", self.update_website_statistics), Action("Importing mailing list counts", self.import_ml_counts), - Action("Generating report", self.generate_report), + # Action("Generating report", self.generate_report), ] def import_versions(self): From f0f3071321e00032e9f980fa1a653cbd27f2b61f Mon Sep 17 00:00:00 2001 From: Dave O'Connor Date: Thu, 11 Dec 2025 19:00:57 -0800 Subject: [PATCH 5/5] Adjusted filtering on queries to precisely select releases by name when provided; Made fully_completed marking more specific; Added logging improvements --- .../management/commands/update_maintainers.py | 2 +- .../commands/import_archives_release_data.py | 3 ++- versions/tasks.py | 26 ++++++++++++++----- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/libraries/management/commands/update_maintainers.py b/libraries/management/commands/update_maintainers.py index cb2f6c75..f57c0761 100644 --- a/libraries/management/commands/update_maintainers.py +++ b/libraries/management/commands/update_maintainers.py @@ -29,7 +29,7 @@ def command(library_name, release): library_versions = library_versions.filter(library__name__iexact=library_name) if release is not None: - library_versions = library_versions.filter(version__name__icontains=release) + library_versions = library_versions.filter(version__name=release) for library_version in library_versions.order_by( "-version__name", "-library__name" diff --git a/versions/management/commands/import_archives_release_data.py b/versions/management/commands/import_archives_release_data.py index 69983db3..87bbfe8d 100644 --- a/versions/management/commands/import_archives_release_data.py +++ b/versions/management/commands/import_archives_release_data.py @@ -43,7 +43,8 @@ def command(release: str, new: bool): last_release = settings.MIN_ARCHIVES_RELEASE if release: - versions = Version.objects.filter(name__icontains=release) + name = f"boost-{release}" if release not in ["master", "develop"] else release + versions = [Version.objects.get(name=name)] elif new: versions = [Version.objects.most_recent()] else: diff --git a/versions/tasks.py b/versions/tasks.py index 59b9a131..59943f12 100644 --- a/versions/tasks.py +++ b/versions/tasks.py @@ -73,7 +73,7 @@ def import_versions( if purge_after: logger.info("linking fastly purge") task_group.link(purge_fastly_release_cache.s()) - task_group.link(mark_fully_completed.s()) + task_group.link(mark_fully_completed.s(full_release_only=True)) task_group() import_release_notes.delay() @@ -84,9 +84,13 @@ def import_release_notes(new_versions_only=True): release notes in the repository.""" versions = [Version.objects.most_recent()] if not new_versions_only: - versions = Version.objects.exclude(name__in=["master", "develop"]).active() + versions = ( + Version.objects.exclude(name__in=["master", "develop"]) + .active() + .order_by("name") + ) - logger.info(f"import_release_notes {[(v.pk,v.name) for v in versions]}") + logger.info(f"import_release_notes {[v.name for v in versions]}") for version in versions: logger.info(f"retrieving release notes for {version.name=} {version.pk=}") store_release_notes_task(version.pk) @@ -204,7 +208,7 @@ def import_most_recent_beta_release(token=None, delete_old=False): logger.info(f"calling import_version with {name=} {tag=}") import_version(name, tag, token=token, beta=True, full_release=False) logger.info(f"completed import_version with {name=} {tag=}") - mark_fully_completed() + mark_fully_completed(beta_only=True) # new_versions_only='False' otherwise will only be full releases import_release_notes(new_versions_only=False) return @@ -484,10 +488,18 @@ def purge_fastly_release_cache(): @app.task -def mark_fully_completed(): +def mark_fully_completed(beta_only=False, full_release_only=False): """Marks all versions as fully imported""" - Version.objects.filter(fully_imported=False).update(fully_imported=True) - logger.info("Marked all versions as fully imported.") + qs = Version.objects.filter(fully_imported=False) + if full_release_only: + logger.info("Marking active as fully imported") + qs = qs.filter(full_release=True) + elif beta_only: + logger.info("Marking beta as fully imported") + qs = qs.filter(beta=True) + versions = [v.name for v in qs.order_by("name").all()] + qs.update(fully_imported=True) + logger.info(f"Marked {versions=} as fully imported.") # Helper functions
+ class="p-1 text-sm border border-b-0 border-gray-400 text-center"> Download Now!
+ {{ download.display_name }}