Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 59 additions & 6 deletions libraries/admin.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import structlog
from datetime import date
from django.conf import settings
from django.contrib import admin, messages
from django.core.exceptions import ValidationError
Expand All @@ -12,9 +13,11 @@
from django.shortcuts import redirect
from django.views.generic import TemplateView
from django import forms
from celery import chain, group

from core.admin_filters import StaffUserCreatedByFilter
from libraries.forms import CreateReportForm, CreateReportFullForm
from reports.generation import determine_versions
from versions.models import Version
from versions.tasks import import_all_library_versions
from .filters import ReportConfigurationFilter
Expand All @@ -31,15 +34,22 @@
WordcloudMergeWord,
)
from .tasks import (
count_mailinglist_contributors,
count_commit_contributors_totals,
generate_library_report,
generate_mailinglist_cloud,
generate_release_report_with_stats,
generate_search_cloud,
get_mailing_list_stats,
get_new_contributors_count,
get_new_subscribers_stats,
synchronize_commit_author_user_data,
update_authors_and_maintainers,
update_commit_author_github_data,
update_commits,
update_issues,
update_libraries,
update_library_version_documentation_urls_all_versions,
generate_release_report,
synchronize_commit_author_user_data,
)
from .utils import generate_release_report_filename

Expand Down Expand Up @@ -189,11 +199,52 @@ def get_context_data(self, **kwargs):

def generate_report(self):
uri = f"{settings.ACCOUNT_DEFAULT_HTTP_PROTOCOL}://{self.request.get_host()}"
generate_release_report.delay(
user_id=self.request.user.id,
params=self.request.GET,
base_uri=uri,
logger.info("Queuing release report workflow")

# Get the report configuration to determine version parameters
form = self.get_form()
if not form.is_valid():
return

report_configuration = form.cleaned_data["report_configuration"]

# NOTE TO FUTURE DEVS: remember to account for the fact that a report
# configuration may not match with a real version in frequent cases where
# reports are generated before the release version has been created.
(report_before_release, prior_version, version) = determine_versions(
report_configuration.version
)

# trigger stats tasks first to run in parallel using group, then chain the final
# report generation task
stats_tasks = group(
[
count_mailinglist_contributors.s(prior_version.pk, version.pk),
get_mailing_list_stats.s(prior_version.pk, version.pk),
count_commit_contributors_totals.s(version.pk, prior_version.pk),
get_new_subscribers_stats.s(
prior_version.release_date, version.release_date or date.today()
),
generate_mailinglist_cloud.s(prior_version.pk, version.pk),
# if the report is based on a live version, look for stats for that
# version, otherwise use the stats for the prior (live) version
generate_search_cloud.s(
prior_version.pk if report_before_release else version.pk
),
get_new_contributors_count.s(version.pk),
]
)

# chain stats collection with final report generation
workflow = chain(
stats_tasks,
generate_release_report_with_stats.s(
self.request.user.id,
self.request.GET,
uri,
),
)
workflow.apply_async()

def locked_publish_check(self):
form = self.get_form()
Expand Down Expand Up @@ -245,6 +296,8 @@ class LibraryReportView(ReleaseReportView):
report_type = "library report"

def generate_report(self):
# For library reports, we don't need a complex stats workflow since
# CreateReportFullForm doesn't use the same async stats pattern
generate_library_report.delay(self.request.GET)


Expand Down
114 changes: 114 additions & 0 deletions libraries/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,120 @@ def get_stats(self):
"slack": slack_stats,
}

def generate_context(
self, report_configuration: ReportConfiguration, stats_results: dict
):
committee_members = report_configuration.financial_committee_members.all()

# NOTE TO FUTURE DEVS: remember to account for the fact that a report
# configuration may not match with a real version in frequent cases where
# reports are generated before the release version has been created.
(report_before_release, prior_version, version) = determine_versions(
report_configuration.version
)

# Unpack stats_results in the same order as tasks were defined in the workflow
(
(mailinglist_contributor_release_count, mailinglist_contributor_new_count),
(mailinglist_post_stats, total_mailinglist_count),
(commit_contributors_release_count, commit_contributors_new_count),
mailinglist_new_subscribers_stats,
(
mailinglist_words,
mailinglist_wordcloud_base64,
mailinglist_wordcloud_top_words,
),
(search_wordcloud_base64, search_wordcloud_top_words, search_stats),
global_contributors_new_count,
) = stats_results

# Compute the synchronous stats that don't require async tasks
commit_count, version_commit_count = get_commit_counts(version)
top_libraries_for_version = get_top_libraries_for_version(version)
top_libraries_by_name = get_libraries_by_name(version)
library_order = self._get_library_order(top_libraries_by_name)
# TODO: we may in future need to find a way to show the removed libraries, for
# now it's not needed. In that case the distinction between running this on a
# ReportConfiguration with a real 'version' entry vs one that instead uses 'master'
# will need to be considered
libraries = get_libraries(library_order)
new_libraries = libraries.exclude(
library_version__version__release_date__lte=prior_version.release_date
).prefetch_related("authors")
top_contributors = get_top_contributors_for_version(version)
mailinglist_counts = get_mailinglist_counts(version)
lines_added, lines_removed = lines_changes_count(version)
opened_issues_count, closed_issues_count = get_issues_counts(
prior_version, version
)
# TODO: connected to above todo, add removed_libraries.count()
removed_library_count = 0

library_data = get_library_data(library_order, prior_version.pk, version.pk)
slack_stats = get_slack_stats(prior_version, version)

library_index_library_data = get_libraries_for_index(library_data, version)
batched_library_data = conditional_batched(
library_data,
2,
lambda x: x.get("top_contributors_release").count()
<= RELEASE_REPORT_AUTHORS_PER_PAGE_THRESHOLD,
)
git_graph_data = get_git_graph_data(prior_version, version)
download = get_download_links(version)

return {
"committee_members": committee_members,
"lines_added": lines_added,
"lines_removed": lines_removed,
"version": version,
"report_configuration": report_configuration,
"prior_version": prior_version,
"opened_issues_count": opened_issues_count,
"closed_issues_count": closed_issues_count,
"mailinglist_wordcloud_base64": mailinglist_wordcloud_base64,
"mailinglist_wordcloud_frequencies": mailinglist_wordcloud_top_words,
"mailinglist_counts": mailinglist_counts,
"mailinglist_total": total_mailinglist_count or 0,
"mailinglist_contributor_release_count": mailinglist_contributor_release_count, # noqa: E501
"mailinglist_contributor_new_count": mailinglist_contributor_new_count,
"mailinglist_post_stats": mailinglist_post_stats,
"mailinglist_new_subscribers_stats": mailinglist_new_subscribers_stats,
"mailinglist_charts_start_year": prior_version.release_date.year,
"search_wordcloud_base64": search_wordcloud_base64,
"search_wordcloud_frequencies": search_wordcloud_top_words,
"search_stats": search_stats,
"commit_contributors_release_count": commit_contributors_release_count,
"commit_contributors_new_count": commit_contributors_new_count,
"global_contributors_new_count": global_contributors_new_count,
"commit_count": commit_count,
"version_commit_count": version_commit_count,
"top_contributors_release_overall": top_contributors,
"library_data": library_data,
"new_libraries": new_libraries,
"batched_library_data": batched_library_data,
"top_libraries_for_version": top_libraries_for_version,
"library_count": libraries.count(),
"library_index_libraries": library_index_library_data,
"added_library_count": new_libraries.count(),
"removed_library_count": removed_library_count,
"downloads": download,
"contribution_box_graph": git_graph_data,
"slack_channels": get_slack_channels(),
"slack": slack_stats,
}

def render_with_stats(self, stats_results, base_uri=None):
"""Render HTML with pre-computed stats results"""
context = self.generate_context(
self.cleaned_data["report_configuration"], stats_results
)
if base_uri:
context["base_uri"] = base_uri
html = render_to_string(self.html_template_name, context)
self.cache_set(html)
return html


class CommitAuthorEmailForm(Form):
"""
Expand Down
2 changes: 1 addition & 1 deletion libraries/management/commands/release_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def set_tasks(self):
Action("Updating slack activity buckets", ["fetch_slack_activity"]),
Action("Updating website statistics", self.update_website_statistics),
Action("Importing mailing list counts", self.import_ml_counts),
Action("Generating report", self.generate_report),
# Action("Generating report", self.generate_report),
]

def import_versions(self):
Expand Down
2 changes: 1 addition & 1 deletion libraries/management/commands/update_maintainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def command(library_name, release):
library_versions = library_versions.filter(library__name__iexact=library_name)

if release is not None:
library_versions = library_versions.filter(version__name__icontains=release)
library_versions = library_versions.filter(version__name=release)

for library_version in library_versions.order_by(
"-version__name", "-library__name"
Expand Down
111 changes: 74 additions & 37 deletions libraries/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
CommitAuthor,
ReleaseReport,
)
from mailing_list.models import EmailData, PostingData, SubscriptionData
from mailing_list.models import EmailData, PostingData
from reports.generation import (
generate_algolia_words,
generate_wordcloud,
Expand Down Expand Up @@ -251,12 +251,35 @@ def update_issues(clean=False):


@app.task
def generate_release_report(user_id: int, params: dict, base_uri: str = None):
"""Generate a release report asynchronously and save it in RenderedContent."""
def generate_release_report_with_stats(stats_results, user_id, params, base_uri=None):
"""Wrapper task that reorders arguments for workflow mode."""
return generate_release_report(user_id, params, base_uri, stats_results)


@app.task
def generate_release_report(user_id, params, base_uri=None, stats_results=None):
"""Generate a release report asynchronously and save to RenderedContent/PDF

Args:
user_id: ID of the user creating the report
params: Form parameters for report configuration
base_uri: Base URI for the report (optional)
stats_results: Pre-collected stats from workflow (optional)
"""
logger.info(f"Starting generate_release_report {settings.LOCAL_DEVELOPMENT=}")

from libraries.forms import CreateReportForm

form = CreateReportForm(params)
html = form.cache_html(base_uri=base_uri)
if not form.is_valid():
logger.error(f"Form validation failed, {form.errors}")
return None

if stats_results:
html = form.render_with_stats(stats_results, base_uri=base_uri)
else:
html = form.cache_html(base_uri=base_uri)

# override the base uri to reference the internal container for local dev
if settings.LOCAL_DEVELOPMENT:
html = update_base_tag(html, DOCKER_CONTAINER_URL_WEB)
Expand All @@ -265,7 +288,9 @@ def generate_release_report(user_id: int, params: dict, base_uri: str = None):
created_by_id=user_id,
report_configuration_id=params.get("report_configuration"),
)
logger.info(f"Saving release_report {params.get('report_configuration')=}")
release_report.save()
logger.info(f"generate release report pdf {release_report.pk=}")
generate_release_report_pdf.delay(
release_report.pk, html=html, publish=params.get("publish")
)
Expand Down Expand Up @@ -330,7 +355,7 @@ def generate_library_report(params):
from libraries.forms import CreateReportFullForm

form = CreateReportFullForm(params)
form.cache_html()
return form.cache_html()


@app.task
Expand Down Expand Up @@ -588,40 +613,52 @@ def get_mailing_list_stats(prior_version_id: int, version_id: int):

@shared_task
def get_new_subscribers_stats(start_date: date, end_date: date):
data = (
SubscriptionData.objects.filter(
subscription_dt__gte=start_date,
subscription_dt__lte=end_date,
list="boost",
)
.annotate(
week=ExtractWeek("subscription_dt"),
iso_year=ExtractIsoYear("subscription_dt"),
)
.values("iso_year", "week")
.annotate(count=Count("id"))
.order_by("iso_year", "week")
)
"""Get new subscribers statistics for HyperKitty mailing list using raw SQL."""
import psycopg2
from django.conf import settings

# Convert data into a dict for easy lookup
counts_by_week = {(row["iso_year"], row["week"]): row["count"] for row in data}
conn = psycopg2.connect(settings.HYPERKITTY_DATABASE_URL)

# Iterate through every ISO week in the date range
current = start_date
seen = set()
chart_data = []
while current <= end_date:
iso_year, iso_week, _ = current.isocalendar()
key = (iso_year, iso_week)
if key not in seen: # skip duplicate weeks in the same loop
seen.add(key)
year_suffix = str(iso_year)[2:]
label = f"{iso_week} ({year_suffix})"
count = counts_by_week.get(key, 0)
chart_data.append({"x": label, "y": count})
current += timedelta(days=7) # hop by weeks

return chart_data
try:
with conn.cursor() as cursor:
cursor.execute(
"""
SELECT
EXTRACT(ISOYEAR FROM date_joined) as iso_year,
EXTRACT(WEEK FROM date_joined) as iso_week,
COUNT(*) as count
FROM auth_user
WHERE date_joined::date >= %s
AND date_joined::date <= %s
GROUP BY iso_year, iso_week
ORDER BY iso_year, iso_week
""",
[start_date, end_date],
)

data = cursor.fetchall()

# Convert data into a dict for easy lookup
counts_by_week = {(int(row[0]), int(row[1])): row[2] for row in data}

# Iterate through every ISO week in the date range
current = start_date
seen = set()
chart_data = []
while current <= end_date:
iso_year, iso_week, _ = current.isocalendar()
key = (iso_year, iso_week)
if key not in seen: # skip duplicate weeks in the same loop
seen.add(key)
year_suffix = str(iso_year)[2:]
label = f"{iso_week} ({year_suffix})"
count = counts_by_week.get(key, 0)
chart_data.append({"x": label, "y": count})
current += timedelta(days=7) # hop by weeks

return chart_data
finally:
conn.close()


@shared_task
Expand Down
Loading