Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion tests/perf/test_stats.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# from treeherder.model.models import Push
from treeherder.perf.stats import interpret_silverman_kde
from treeherder.perf.stats import (
interpret_cles,
interpret_silverman_kde,
plot_kde_with_isj_bandwidth,
)

# p-value threshold to use throughout
PVALUE_THRESHOLD = 0.05
Expand All @@ -21,3 +25,61 @@ def test_interpret_silverman_kde():
) = interpret_silverman_kde(mock_base, mock_new, lower_is_better)
assert silverman_kde["bandwidth"] == "Silverman"
assert warning_msgs == []


def test_plot_kde_with_isj_bandwidth():
mock_base = [2.74]
mock_new = [2.65]

(kde_plot_base, kde_plot_new, kde_warnings) = plot_kde_with_isj_bandwidth(
mock_base,
mock_new,
)

assert (
kde_warnings[0]
== "Less than 2 datapoints or no standard variance for a meaningful fit Kernel Density Estimator (KDE) with an ISJ bandwidth to Base."
)
assert (
kde_warnings[1]
== "Less than 2 datapoints or no standard variance for a meaningful fit Kernel Density Estimator (KDE) with an ISJ bandwidth to New."
)

mock_base_2 = [2.74, 2.56, 2.88]
mock_new_2 = [2.65, 2.33, 2.25]
(kde_plot_base, kde_plot_new, kde_warnings) = plot_kde_with_isj_bandwidth(
mock_base_2,
mock_new_2,
)
assert kde_plot_new["sample_count"] == 3
assert kde_plot_base["sample_count"] == 3


def test_interpret_cles():
mock_base = [2.74]
mock_new = [2.65]
mock_mann_stat = 0.1
mock_mann_pvalue = 0.2
interpretation = ("",)
lower_is_better = (False,)
mock_delta = 0.2

(
cles_obj,
cles,
is_significant,
cles_explanation,
mann_whitney_u_cles,
cliffs_delta_cles,
) = interpret_cles(
mock_mann_stat,
mock_mann_pvalue,
mock_new,
mock_base,
mock_delta,
interpretation,
lower_is_better,
)

assert cles_obj["cles"] == 0.1
assert cles == 0.1
150 changes: 149 additions & 1 deletion tests/webapp/api/test_perfcompare_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,6 +1138,154 @@ def test_perfcompare_results_with_mann_witney_u_against_no_base(
base_perf_data_values,
)

expected = [
{
"base_rev": None,
"new_rev": test_perfcomp_push_2.revision,
"framework_id": base_sig.framework.id,
"platform": base_sig.platform.platform,
"suite": base_sig.suite,
"header_name": response["header_name"],
"base_repository_name": base_sig.repository.name,
"new_repository_name": new_sig.repository.name,
"base_app": base_sig.application,
"new_app": new_sig.application,
"is_complete": response["is_complete"],
"base_measurement_unit": base_sig.measurement_unit,
"new_measurement_unit": new_sig.measurement_unit,
"base_retriggerable_job_ids": [4],
"new_retriggerable_job_ids": [10],
"base_runs": base_perf_data_values,
"new_runs": new_perf_data_values,
"base_runs_replicates": [],
"new_runs_replicates": [],
"test": base_sig.test,
"option_name": response["option_name"],
"extra_options": base_sig.extra_options,
"delta_value": round(response["delta_value"], 2),
"delta_percentage": round(response["delta_pct"], 2),
"lower_is_better": response["lower_is_better"],
"is_confident": response["is_confident"],
"more_runs_are_needed": False,
"mann_whitney_test": {
"interpretation": "not significant",
"pvalue": 1.0,
"stat": None,
"test_name": "Mann-Whitney U",
},
"graphs_link": f"https://treeherder.mozilla.org/perfherder/graphs?"
f"highlightedRevisions={test_perfcomp_push_2.revision}&"
f"series={try_repository.name}%2C{base_sig.signature_hash}%2C1%2C{base_sig.framework.id}&"
f"series={test_repository.name}%2C{base_sig.signature_hash}%2C1%2C{base_sig.framework.id}&"
f"timerange=86400",
"is_fit_good": True,
"is_improvement": None,
"is_regression": None,
"is_meaningful": None,
"is_new_better": False,
"base_parent_signature": response["base_parent_signature"],
"new_parent_signature": response["new_parent_signature"],
"base_signature_id": response["base_signature_id"],
"new_signature_id": response["new_signature_id"],
"has_subtests": response["has_subtests"],
"cles": None,
"cliffs_delta": -1.0,
"cliffs_interpretation": "large",
"direction_of_change": "worse",
"base_standard_stats": {
"count": 1,
"max": 32.4,
"mean": round(response["base_avg_value"], 2),
"median": round(response["base_median_value"], 2),
"min": 32.4,
"stddev": 0.0,
"stddev_pct": 0.0,
"variance": 0.0,
},
"new_standard_stats": {
"count": 1,
"max": 40.2,
"mean": round(response["new_avg_value"], 2),
"median": round(response["new_median_value"], 2),
"min": 40.2,
"stddev": 0.0,
"stddev_pct": 0.0,
"variance": 0.0,
},
"kde_base": {
"kde_x": [
32.4,
],
"kde_y": [],
"median": 32.4,
"sample_count": 1,
},
"kde_new": {
"kde_x": [
40.2,
],
"kde_y": [],
"median": 40.2,
"sample_count": 1,
},
"kde_warnings": [
"Less than 2 datapoints or no standard variance for a meaningful fit "
"Kernel Density Estimator (KDE) with an ISJ bandwidth to Base.",
"Less than 2 datapoints or no standard variance for a meaningful fit "
"Kernel Density Estimator (KDE) with an ISJ bandwidth to New.",
],
"ks_test": {
"interpretation": "KS test p-value: 1.000, good fit",
"pvalue": 1.0,
"stat": 1.0,
"test_name": "Kolmogorov-Smirnov",
},
"ks_warning": None,
"shapiro_wilk_test_base": {
"interpretation": "Not enough data for normality test.",
"pvalue": None,
"stat": None,
"test_name": "Shapiro-Wilk",
},
"shapiro_wilk_test_new": {
"interpretation": "Not enough data for normality test",
"pvalue": None,
"stat": None,
"test_name": "Shapiro-Wilk",
},
"shapiro_wilk_warnings": [
"Shapiro-Wilk test cannot be run on Base with fewer than 3 data points.",
"Shapiro-Wilk test cannot be run on New with fewer than 3 data points.",
],
"silverman_kde": {
"bandwidth": "Silverman",
"base_locations": [32.4],
"base_mode_count": 1,
"base_prominence": 0.08,
"is_improvement": None,
"is_regression": None,
"modes": [
{
"ci_high": None,
"ci_low": None,
"ci_warning": None,
"median_shift_summary": None,
"mode_end": "36.47",
"mode_name": "Mode 1",
"mode_start": "28.33",
"shift": None,
"shift_summary": None,
},
],
"new_locations": [40.2],
"new_mode_count": 1,
"new_prominence": 0.08,
},
"silverman_warnings": [],
"warning_c_delta": None,
},
]

query_params = (
"?base_repository={}&new_repository={}&new_revision={}&framework={"
"}&interval={}&no_subtests=true&test_version={}".format(
Expand All @@ -1151,7 +1299,7 @@ def test_perfcompare_results_with_mann_witney_u_against_no_base(
)

response = client.get(reverse("perfcompare-results") + query_params)

assert response.status_code == 200
assert expected[0] == response.json()[0]
assert response.json()[0]["base_parent_signature"] is None
assert response.json()[0]["new_parent_signature"] is None
Loading