|
11 | 11 | from django.urls import reverse |
12 | 12 | from django.utils import timezone |
13 | 13 |
|
14 | | -from dojo.models import Finding, Test, Test_Type, User |
| 14 | +from dojo.models import Engagement, Finding, Product, Product_Type, Test, Test_Type, User |
15 | 15 |
|
16 | 16 | from .dojo_test_case import DojoAPITestCase, get_unit_tests_scans_path |
17 | 17 | from .test_utils import assertTestImportModelsCreated |
@@ -2033,6 +2033,118 @@ def test_reimport_set_scan_date_parser_sets_date(self): |
2033 | 2033 | date = findings["results"][0]["date"] |
2034 | 2034 | self.assertEqual(date, "2006-12-26") |
2035 | 2035 |
|
| 2036 | + @override_settings( |
| 2037 | + IMPORT_REIMPORT_DEDUPE_BATCH_SIZE=50, |
| 2038 | + IMPORT_REIMPORT_MATCH_BATCH_SIZE=50, |
| 2039 | + ) |
| 2040 | + def test_reimport_with_internal_duplicates_same_batch(self): |
| 2041 | + """ |
| 2042 | + Test that duplicates within the same scan report are detected during reimport |
| 2043 | + even when they're processed in the same batch. |
| 2044 | +
|
| 2045 | + This test mimics the integration test test_import_path_tests in tests/dedupe_test.py, |
| 2046 | + which tests the same scenario but uses Selenium UI testing. |
| 2047 | +
|
| 2048 | + BACKGROUND: |
| 2049 | + When reimporting a scan report, the system uses batch processing to optimize |
| 2050 | + database queries. Findings are processed in batches (default: 1000), and for |
| 2051 | + each batch, candidate matches are fetched from the database once. However, this |
| 2052 | + creates a bug: if multiple duplicate findings exist within the same scan report |
| 2053 | + and they're processed in the same batch, they cannot match each other because |
| 2054 | + the candidate dictionary is built before any findings in the batch are saved. |
| 2055 | +
|
| 2056 | + THE BUG: |
| 2057 | + - Finding #1 in batch: No matches found → Creates new finding |
| 2058 | + - Finding #2 in batch: No matches found (Finding #1 not in DB yet) → Creates duplicate |
| 2059 | + - Finding #3 in batch: No matches found (Findings #1 and #2 not in DB yet) → Creates duplicate |
| 2060 | + Result: 3 findings created instead of 1 unique finding |
| 2061 | +
|
| 2062 | + THIS TEST: |
| 2063 | + This test reproduces the bug by: |
| 2064 | + 1. Creating an empty test (no existing findings to match against) |
| 2065 | + 2. Reimporting dedupe_path_1.json which contains 3 duplicate findings: |
| 2066 | + - All have same file_path: "/opt/appsecpipeline/source/dojo/cred/views.py" |
| 2067 | + - All have same line_number: 524 |
| 2068 | + - All have same test_id: "B110" |
| 2069 | + - All should have the same hash_code and match each other |
| 2070 | + 3. Using small batch size (50) to ensure all 3 duplicates are in the same batch |
| 2071 | + 4. Verifying that only 1 finding is created (not 3) |
| 2072 | +
|
| 2073 | + EXPECTED BEHAVIOR: |
| 2074 | + - Total findings: 1 (the 3 duplicates should match to 1 unique finding) |
| 2075 | + - Active findings: 1 |
| 2076 | + - Duplicate findings: 0 (duplicates within report should match, not be marked as duplicates) |
| 2077 | +
|
| 2078 | + CURRENT BUGGY BEHAVIOR: |
| 2079 | + - Total findings: 3 (all duplicates are created because they can't match each other) |
| 2080 | + - Active findings: 1 (first one) |
| 2081 | + - Duplicate findings: 2 (second and third are marked as duplicates of first) |
| 2082 | +
|
| 2083 | + This test should fail with the current buggy implementation and pass after |
| 2084 | + implementing intra-batch duplicate tracking. |
| 2085 | + """ |
| 2086 | + # Use the dedupe_path_1.json file from integration tests (has 3 duplicate findings) |
| 2087 | + # Note: This file is in tests/dedupe_scans, not unittests/scans |
| 2088 | + dedupe_path_file = Path(__file__).parent.parent / "tests" / "dedupe_scans" / "dedupe_path_1.json" |
| 2089 | + |
| 2090 | + # Create engagement and test |
| 2091 | + product_type, _ = Product_Type.objects.get_or_create(name="PT Bandit Internal Dupes") |
| 2092 | + product, _ = Product.objects.get_or_create(name="P Bandit Internal Dupes", prod_type=product_type) |
| 2093 | + engagement = Engagement.objects.create( |
| 2094 | + name="E Bandit Internal Dupes", |
| 2095 | + product=product, |
| 2096 | + target_start=timezone.now(), |
| 2097 | + target_end=timezone.now(), |
| 2098 | + ) |
| 2099 | + engagement.deduplication_on_engagement = True |
| 2100 | + engagement.save() |
| 2101 | + |
| 2102 | + # Create an empty test first |
| 2103 | + # Get or create the test type |
| 2104 | + test_type, _ = Test_Type.objects.get_or_create(name="Bandit Scan") |
| 2105 | + test = Test.objects.create( |
| 2106 | + engagement=engagement, |
| 2107 | + test_type=test_type, |
| 2108 | + title="Path Test 1", |
| 2109 | + target_start=timezone.now(), |
| 2110 | + target_end=timezone.now(), |
| 2111 | + ) |
| 2112 | + test_id = test.id |
| 2113 | + |
| 2114 | + # Now reimport the file with duplicates to the empty test |
| 2115 | + # This tests reimport batch matching where duplicates within the same report need to match each other |
| 2116 | + reimport_result = self.reimport_scan_with_params( |
| 2117 | + test_id, |
| 2118 | + dedupe_path_file, |
| 2119 | + scan_type="Bandit Scan", |
| 2120 | + ) |
| 2121 | + |
| 2122 | + test_id = reimport_result["test"] |
| 2123 | + |
| 2124 | + # Verify findings count |
| 2125 | + # dedupe_path_1.json has 3 findings that are duplicates (same file_path, line_number, test_id) |
| 2126 | + # They should all match each other, resulting in 1 unique finding |
| 2127 | + # However, if intra-batch matching fails, we'll get 3 findings created instead of 1 |
| 2128 | + total_findings = Finding.objects.filter(test_id=test_id).count() |
| 2129 | + active_findings = Finding.objects.filter(test_id=test_id, active=True, duplicate=False).count() |
| 2130 | + duplicate_findings = Finding.objects.filter(test_id=test_id, duplicate=True).count() |
| 2131 | + |
| 2132 | + # Expected: 1 active finding (the 3 duplicates should match each other) |
| 2133 | + # If intra-batch matching fails, we'll get 3 findings instead |
| 2134 | + with self.subTest(metric="total_findings"): |
| 2135 | + self.assertEqual(total_findings, 1, |
| 2136 | + f"Expected 1 total finding (3 duplicates should match to 1), got {total_findings}. " |
| 2137 | + f"If this is 3, intra-batch duplicates weren't detected.") |
| 2138 | + |
| 2139 | + with self.subTest(metric="active_findings"): |
| 2140 | + self.assertEqual(active_findings, 1, |
| 2141 | + f"Expected 1 active finding, got {active_findings}. " |
| 2142 | + f"If this fails, intra-batch duplicates weren't detected.") |
| 2143 | + |
| 2144 | + with self.subTest(metric="duplicate_findings"): |
| 2145 | + self.assertEqual(duplicate_findings, 0, |
| 2146 | + f"Expected 0 duplicate findings (duplicates within report should match), got {duplicate_findings}") |
| 2147 | + |
2036 | 2148 |
|
2037 | 2149 | class ImportReimportTestUI(DojoAPITestCase, ImportReimportMixin): |
2038 | 2150 | fixtures = ["dojo_testdata.json"] |
|
0 commit comments