Skip to content

Commit 6861148

Browse files
Copilotzkoppert
andcommitted
Implement PR comment statistics feature with HIDE_PR_STATISTICS configuration
Co-authored-by: zkoppert <[email protected]>
1 parent d266d0b commit 6861148

13 files changed

+354
-12
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ This action can be configured to authenticate with GitHub App Installation or Pe
156156
| `HIDE_TIME_TO_FIRST_RESPONSE` | False | False | If set to `true`, the time to first response will not be displayed in the generated Markdown file. |
157157
| `HIDE_STATUS` | False | True | If set to `true`, the status column will not be shown |
158158
| `HIDE_CREATED_AT` | False | True | If set to `true`, the creation timestamp will not be displayed in the generated Markdown file. |
159+
| `HIDE_PR_STATISTICS` | False | True | If set to `true`, PR comment statistics (mean, median, 90th percentile, and individual PR comment counts) will not be displayed in the generated Markdown file. |
159160
| `DRAFT_PR_TRACKING` | False | False | If set to `true`, draft PRs will be included in the metrics as a new column and in the summary stats. |
160161
| `IGNORE_USERS` | False | False | A comma separated list of users to ignore when calculating metrics. (ie. `IGNORE_USERS: 'user1,user2'`). To ignore bots, append `[bot]` to the user (ie. `IGNORE_USERS: 'github-actions[bot]'`) Users in this list will also have their authored issues and pull requests removed from the Markdown table. |
161162
| `ENABLE_MENTOR_COUNT` | False | False | If set to 'TRUE' count number of comments users left on discussions, issues and PRs and display number of active mentors |

classes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class IssueWithMetrics:
2525
mentor_activity (dict, optional): A dictionary containing active mentors
2626
created_at (datetime, optional): The time the issue was created.
2727
status (str, optional): The status of the issue, e.g., "open", "closed as completed",
28+
pr_comment_count (int, optional): The number of comments on the PR (excluding bots).
2829
"""
2930

3031
# pylint: disable=too-many-instance-attributes
@@ -44,6 +45,7 @@ def __init__(
4445
assignee=None,
4546
assignees=None,
4647
status=None,
48+
pr_comment_count=None,
4749
):
4850
self.title = title
4951
self.html_url = html_url
@@ -58,3 +60,4 @@ def __init__(
5860
self.mentor_activity = mentor_activity
5961
self.created_at = created_at
6062
self.status = status
63+
self.pr_comment_count = pr_comment_count

config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class EnvVars:
5757
rate_limit_bypass (bool): If set to TRUE, bypass the rate limit for the GitHub API
5858
draft_pr_tracking (bool): If set to TRUE, track PR time in draft state
5959
in addition to other metrics
60+
hide_pr_statistics (bool): If set to TRUE, hide PR comment statistics in the output
6061
"""
6162

6263
def __init__(
@@ -88,6 +89,7 @@ def __init__(
8889
output_file: str,
8990
rate_limit_bypass: bool = False,
9091
draft_pr_tracking: bool = False,
92+
hide_pr_statistics: bool = True,
9193
):
9294
self.gh_app_id = gh_app_id
9395
self.gh_app_installation_id = gh_app_installation_id
@@ -116,6 +118,7 @@ def __init__(
116118
self.output_file = output_file
117119
self.rate_limit_bypass = rate_limit_bypass
118120
self.draft_pr_tracking = draft_pr_tracking
121+
self.hide_pr_statistics = hide_pr_statistics
119122

120123
def __repr__(self):
121124
return (
@@ -147,6 +150,7 @@ def __repr__(self):
147150
f"{self.output_file}"
148151
f"{self.rate_limit_bypass}"
149152
f"{self.draft_pr_tracking}"
153+
f"{self.hide_pr_statistics}"
150154
)
151155

152156

@@ -244,6 +248,7 @@ def get_env_vars(test: bool = False) -> EnvVars:
244248
hide_time_to_first_response = get_bool_env_var("HIDE_TIME_TO_FIRST_RESPONSE", False)
245249
hide_created_at = get_bool_env_var("HIDE_CREATED_AT", True)
246250
hide_status = get_bool_env_var("HIDE_STATUS", True)
251+
hide_pr_statistics = get_bool_env_var("HIDE_PR_STATISTICS", True)
247252
enable_mentor_count = get_bool_env_var("ENABLE_MENTOR_COUNT", False)
248253
min_mentor_comments = os.getenv("MIN_MENTOR_COMMENTS", "10")
249254
max_comments_eval = os.getenv("MAX_COMMENTS_EVAL", "20")
@@ -278,4 +283,5 @@ def get_env_vars(test: bool = False) -> EnvVars:
278283
output_file,
279284
rate_limit_bypass,
280285
draft_pr_tracking,
286+
hide_pr_statistics,
281287
)

issue_metrics.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from markdown_helpers import markdown_too_large_for_issue_body, split_markdown_file
3131
from markdown_writer import write_to_markdown
3232
from most_active_mentors import count_comments_per_user, get_mentor_count
33+
from pr_comments import count_pr_comments, get_stats_pr_comments
3334
from search import get_owners_and_repositories, search_issues
3435
from time_in_draft import get_stats_time_in_draft, measure_time_in_draft
3536
from time_to_answer import get_stats_time_to_answer, measure_time_to_answer
@@ -153,6 +154,12 @@ def get_per_issue_metrics(
153154
f"An error occurred processing review comments. Perhaps the review contains a ghost user. {e}"
154155
)
155156

157+
# Count PR comments if this is a pull request and statistics are not hidden
158+
if pull_request and not env_vars.hide_pr_statistics:
159+
issue_with_metrics.pr_comment_count = count_pr_comments(
160+
issue, pull_request, ignore_users
161+
)
162+
156163
if env_vars.hide_time_to_first_response is False:
157164
issue_with_metrics.time_to_first_response = (
158165
measure_time_to_first_response(
@@ -302,6 +309,7 @@ def main(): # pragma: no cover
302309
average_time_to_answer=None,
303310
average_time_in_draft=None,
304311
average_time_in_labels=None,
312+
stats_pr_comments=None,
305313
num_issues_opened=None,
306314
num_issues_closed=None,
307315
num_mentor_count=None,
@@ -329,6 +337,7 @@ def main(): # pragma: no cover
329337
average_time_to_answer=None,
330338
average_time_in_draft=None,
331339
average_time_in_labels=None,
340+
stats_pr_comments=None,
332341
num_issues_opened=None,
333342
num_issues_closed=None,
334343
num_mentor_count=None,
@@ -362,6 +371,7 @@ def main(): # pragma: no cover
362371

363372
stats_time_to_answer = get_stats_time_to_answer(issues_with_metrics)
364373
stats_time_in_draft = get_stats_time_in_draft(issues_with_metrics)
374+
stats_pr_comments = get_stats_pr_comments(issues_with_metrics)
365375

366376
num_mentor_count = 0
367377
if enable_mentor_count:
@@ -379,6 +389,7 @@ def main(): # pragma: no cover
379389
stats_time_to_answer=stats_time_to_answer,
380390
stats_time_in_draft=stats_time_in_draft,
381391
stats_time_in_labels=stats_time_in_labels,
392+
stats_pr_comments=stats_pr_comments,
382393
num_issues_opened=num_issues_open,
383394
num_issues_closed=num_issues_closed,
384395
num_mentor_count=num_mentor_count,
@@ -393,6 +404,7 @@ def main(): # pragma: no cover
393404
average_time_to_answer=stats_time_to_answer,
394405
average_time_in_draft=stats_time_in_draft,
395406
average_time_in_labels=stats_time_in_labels,
407+
stats_pr_comments=stats_pr_comments,
396408
num_issues_opened=num_issues_open,
397409
num_issues_closed=num_issues_closed,
398410
num_mentor_count=num_mentor_count,

json_writer.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def write_to_json(
3333
stats_time_to_answer: Union[dict[str, timedelta], None],
3434
stats_time_in_draft: Union[dict[str, timedelta], None],
3535
stats_time_in_labels: Union[dict[str, dict[str, timedelta]], None],
36+
stats_pr_comments: Union[dict[str, float], None],
3637
num_issues_opened: Union[int, None],
3738
num_issues_closed: Union[int, None],
3839
num_mentor_count: Union[int, None],
@@ -142,6 +143,15 @@ def write_to_json(
142143
for label, time in stats_time_in_labels["90p"].items():
143144
p90_time_in_labels[label] = str(time)
144145

146+
# PR comments statistics
147+
average_pr_comments = None
148+
med_pr_comments = None
149+
p90_pr_comments = None
150+
if stats_pr_comments is not None:
151+
average_pr_comments = stats_pr_comments["avg"]
152+
med_pr_comments = stats_pr_comments["med"]
153+
p90_pr_comments = stats_pr_comments["90p"]
154+
145155
# Create a dictionary with the metrics
146156
metrics: dict[str, Any] = {
147157
"average_time_to_first_response": str(average_time_to_first_response),
@@ -159,6 +169,9 @@ def write_to_json(
159169
"90_percentile_time_to_answer": str(p90_time_to_answer),
160170
"90_percentile_time_in_draft": str(p90_time_in_draft),
161171
"90_percentile_time_in_labels": p90_time_in_labels,
172+
"average_pr_comments": average_pr_comments,
173+
"median_pr_comments": med_pr_comments,
174+
"90_percentile_pr_comments": p90_pr_comments,
162175
"num_items_opened": num_issues_opened,
163176
"num_items_closed": num_issues_closed,
164177
"num_mentor_count": num_mentor_count,
@@ -184,6 +197,7 @@ def write_to_json(
184197
"time_to_answer": str(issue.time_to_answer),
185198
"time_in_draft": str(issue.time_in_draft),
186199
"label_metrics": formatted_label_metrics,
200+
"pr_comment_count": issue.pr_comment_count,
187201
"created_at": str(issue.created_at),
188202
}
189203
)

markdown_writer.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ def get_non_hidden_columns(labels) -> List[str]:
9191
if not hide_status:
9292
columns.append("Status")
9393

94+
hide_pr_statistics = env_vars.hide_pr_statistics
95+
if not hide_pr_statistics:
96+
columns.append("PR Comments")
97+
9498
return columns
9599

96100

@@ -101,6 +105,7 @@ def write_to_markdown(
101105
average_time_to_answer: Union[dict[str, timedelta], None],
102106
average_time_in_draft: Union[dict[str, timedelta], None],
103107
average_time_in_labels: Union[dict, None],
108+
stats_pr_comments: Union[dict[str, float], None],
104109
num_issues_opened: Union[int, None],
105110
num_issues_closed: Union[int, None],
106111
num_mentor_count: Union[int, None],
@@ -169,6 +174,7 @@ def write_to_markdown(
169174
average_time_to_answer,
170175
average_time_in_draft,
171176
average_time_in_labels,
177+
stats_pr_comments,
172178
num_issues_opened,
173179
num_issues_closed,
174180
num_mentor_count,
@@ -238,6 +244,8 @@ def write_to_markdown(
238244
file.write(f" {issue.created_at} |")
239245
if "Status" in columns:
240246
file.write(f" {issue.status} |")
247+
if "PR Comments" in columns:
248+
file.write(f" {issue.pr_comment_count or 'N/A'} |")
241249
file.write("\n")
242250
file.write(
243251
"\n_This report was generated with the \
@@ -256,6 +264,7 @@ def write_overall_metrics_tables(
256264
stats_time_to_answer,
257265
average_time_in_draft,
258266
stats_time_in_labels,
267+
stats_pr_comments,
259268
num_issues_opened,
260269
num_issues_closed,
261270
num_mentor_count,
@@ -267,15 +276,21 @@ def write_overall_metrics_tables(
267276
enable_mentor_count=False,
268277
):
269278
"""Write the overall metrics tables to the markdown file."""
270-
if any(
271-
column in columns
272-
for column in [
273-
"Time to first response",
274-
"Time to close",
275-
"Time to answer",
276-
"Time in draft",
277-
]
278-
) or (hide_label_metrics is False and len(labels) > 0):
279+
env_vars = get_env_vars()
280+
281+
if (
282+
any(
283+
column in columns
284+
for column in [
285+
"Time to first response",
286+
"Time to close",
287+
"Time to answer",
288+
"Time in draft",
289+
]
290+
)
291+
or (hide_label_metrics is False and len(labels) > 0)
292+
or (not env_vars.hide_pr_statistics and stats_pr_comments is not None)
293+
):
279294
file.write("| Metric | Average | Median | 90th percentile |\n")
280295
file.write("| --- | --- | --- | ---: |\n")
281296
if "Time to first response" in columns:
@@ -330,6 +345,16 @@ def write_overall_metrics_tables(
330345
f"| {stats_time_in_labels['med'][label]} "
331346
f"| {stats_time_in_labels['90p'][label]} |\n"
332347
)
348+
349+
# Add PR comment statistics if not hidden
350+
if not env_vars.hide_pr_statistics and stats_pr_comments is not None:
351+
file.write(
352+
f"| Number of comments per PR "
353+
f"| {stats_pr_comments['avg']} "
354+
f"| {stats_pr_comments['med']} "
355+
f"| {stats_pr_comments['90p']} |\n"
356+
)
357+
333358
if "Status" in columns: # Add logic for the 'status' column
334359
file.write("| Status | | | |\n")
335360

pr_comments.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
"""A module for measuring the number of comments on pull requests.
2+
3+
This module provides functions for counting comments on GitHub pull requests,
4+
excluding bot comments, and calculating statistics about comment counts.
5+
6+
Functions:
7+
count_pr_comments(
8+
issue: Union[github3.issues.Issue, None],
9+
pull_request: Union[github3.pulls.PullRequest, None],
10+
ignore_users: Union[List[str], None] = None,
11+
) -> Union[int, None]:
12+
Count the number of comments on a pull request, excluding bot comments.
13+
get_stats_pr_comments(
14+
issues_with_metrics: List[IssueWithMetrics],
15+
) -> Union[dict[str, float], None]:
16+
Calculate stats describing the comment count for a list of pull requests.
17+
"""
18+
19+
from typing import List, Union
20+
21+
import github3
22+
import numpy
23+
from classes import IssueWithMetrics
24+
25+
26+
def count_pr_comments(
27+
issue: Union[github3.issues.Issue, None], # type: ignore
28+
pull_request: Union[github3.pulls.PullRequest, None] = None,
29+
ignore_users: Union[List[str], None] = None,
30+
) -> Union[int, None]:
31+
"""Count the number of comments on a pull request, excluding bot comments.
32+
33+
Args:
34+
issue (Union[github3.issues.Issue, None]): A GitHub issue.
35+
pull_request (Union[github3.pulls.PullRequest, None]): A GitHub pull request.
36+
ignore_users (Union[List[str], None]): A list of GitHub usernames to ignore.
37+
38+
Returns:
39+
Union[int, None]: The number of comments on the pull request, excluding bots.
40+
Returns None if not a pull request.
41+
"""
42+
if not pull_request or not issue:
43+
return None
44+
45+
if ignore_users is None:
46+
ignore_users = []
47+
48+
comment_count = 0
49+
50+
# Count issue comments
51+
try:
52+
comments = issue.issue.comments() # type: ignore
53+
for comment in comments:
54+
# Skip bot comments and ignored users
55+
if (
56+
str(comment.user.type.lower()) != "bot"
57+
and comment.user.login not in ignore_users
58+
):
59+
comment_count += 1
60+
except (AttributeError, TypeError):
61+
# If we can't get comments, just continue
62+
pass
63+
64+
# Count pull request review comments
65+
try:
66+
review_comments = pull_request.review_comments()
67+
for comment in review_comments:
68+
# Skip bot comments and ignored users
69+
if (
70+
str(comment.user.type.lower()) != "bot"
71+
and comment.user.login not in ignore_users
72+
):
73+
comment_count += 1
74+
except (AttributeError, TypeError):
75+
# If we can't get review comments, just continue
76+
pass
77+
78+
return comment_count
79+
80+
81+
def get_stats_pr_comments(
82+
issues_with_metrics: List[IssueWithMetrics],
83+
) -> Union[dict[str, float], None]:
84+
"""Calculate stats describing the comment count for a list of pull requests.
85+
86+
Args:
87+
issues_with_metrics (List[IssueWithMetrics]): A list of GitHub issues with metrics attached.
88+
89+
Returns:
90+
Union[Dict[str, float], None]: The stats describing comment counts for PRs.
91+
"""
92+
# Filter out issues that are not pull requests or have no comment count
93+
prs_with_comment_counts = [
94+
issue.pr_comment_count
95+
for issue in issues_with_metrics
96+
if issue.pr_comment_count is not None
97+
]
98+
99+
if not prs_with_comment_counts:
100+
return None
101+
102+
# Calculate statistics
103+
average_comment_count = numpy.round(numpy.average(prs_with_comment_counts), 1)
104+
median_comment_count = numpy.round(numpy.median(prs_with_comment_counts), 1)
105+
ninety_percentile_comment_count = numpy.round(
106+
numpy.percentile(prs_with_comment_counts, 90), 1
107+
)
108+
109+
stats = {
110+
"avg": average_comment_count,
111+
"med": median_comment_count,
112+
"90p": ninety_percentile_comment_count,
113+
}
114+
115+
# Print the statistics
116+
print(f"Average number of comments per PR: {average_comment_count}")
117+
print(f"Median number of comments per PR: {median_comment_count}")
118+
print(f"90th percentile of comments per PR: {ninety_percentile_comment_count}")
119+
120+
return stats

0 commit comments

Comments
 (0)