Skip to content

Commit a9aa9cb

Browse files
stsewdCopilot
andauthored
Search: allow deleting search index for a project (#12584)
This allows us to mark a project so it's never indexed for search. Currently, we will mark these projects manually and create the notification so users are aware of this change and can re-enable search indexing in case we made a mistake without contacting support. ref readthedocs/meta#197 (comment) --------- Co-authored-by: Copilot <[email protected]>
1 parent 24c050c commit a9aa9cb

File tree

9 files changed

+226
-4
lines changed

9 files changed

+226
-4
lines changed

readthedocs/builds/querysets.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,15 @@ def for_reindex(self):
131131
- It's active and has been built at least once successfully.
132132
Since that means that it has files to be indexed.
133133
- Its project is not delisted or marked as spam.
134+
- Its project has search indexing enabled.
134135
"""
135136
return (
136137
self.filter(
137138
active=True,
138139
built=True,
139140
builds__state=BUILD_STATE_FINISHED,
140141
builds__success=True,
142+
project__search_indexing_enabled=True,
141143
)
142144
.exclude(project__delisted=True)
143145
.exclude(project__is_spam=True)

readthedocs/projects/forms.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from readthedocs.core.utils.extend import SettingsOverrideObject
2626
from readthedocs.integrations.models import Integration
2727
from readthedocs.invitations.models import Invitation
28+
from readthedocs.notifications.models import Notification
2829
from readthedocs.oauth.models import RemoteRepository
2930
from readthedocs.organizations.models import Team
3031
from readthedocs.projects.constants import ADDONS_FLYOUT_SORTING_CUSTOM_PATTERN
@@ -36,6 +37,8 @@
3637
from readthedocs.projects.models import Project
3738
from readthedocs.projects.models import ProjectRelationship
3839
from readthedocs.projects.models import WebHook
40+
from readthedocs.projects.notifications import MESSAGE_PROJECT_SEARCH_INDEXING_DISABLED
41+
from readthedocs.projects.tasks.search import index_project
3942
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
4043
from readthedocs.redirects.models import Redirect
4144

@@ -431,6 +434,7 @@ class Meta:
431434
"versioning_scheme",
432435
"default_branch",
433436
"readthedocs_yaml_path",
437+
"search_indexing_enabled",
434438
# Meta data
435439
"programming_language",
436440
"project_url",
@@ -452,6 +456,8 @@ class Meta:
452456
def __init__(self, *args, **kwargs):
453457
super().__init__(*args, **kwargs)
454458

459+
self.had_search_disabled = not self.instance.search_indexing_enabled
460+
455461
# Remove empty choice from options.
456462
self.fields["versioning_scheme"].choices = [
457463
(key, value) for key, value in self.fields["versioning_scheme"].choices if key
@@ -467,6 +473,11 @@ def __init__(self, *args, **kwargs):
467473
)
468474
self.fields["versioning_scheme"].disabled = True
469475

476+
# Only show this field if search is disabled for the project.
477+
# We allow enabling it from the form, but not disabling it.
478+
if self.instance.search_indexing_enabled:
479+
self.fields.pop("search_indexing_enabled")
480+
470481
# NOTE: we are deprecating this feature.
471482
# However, we will keep it available for projects that already using it.
472483
# Old projects not using it already or new projects won't be able to enable.
@@ -565,6 +576,17 @@ def clean_tags(self):
565576
)
566577
return tags
567578

579+
def save(self, commit=True):
580+
instance = super().save(commit)
581+
# Trigger a reindex when enabling search from the form.
582+
if self.had_search_disabled and instance.search_indexing_enabled:
583+
index_project.delay(project_slug=instance.slug)
584+
Notification.objects.cancel(
585+
message_id=MESSAGE_PROJECT_SEARCH_INDEXING_DISABLED,
586+
attached_to=instance,
587+
)
588+
return instance
589+
568590

569591
class ProjectRelationshipForm(forms.ModelForm):
570592
"""Form to add/update project relationships."""
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Generated by Django 5.2.7 on 2025-11-19 19:47
2+
3+
from django.db import migrations
4+
from django.db import models
5+
from django_safemigrate import Safe
6+
7+
8+
class Migration(migrations.Migration):
9+
safe = Safe.before_deploy()
10+
11+
dependencies = [
12+
("projects", "0155_custom_git_checkout_step"),
13+
]
14+
15+
operations = [
16+
migrations.AddField(
17+
model_name="historicalproject",
18+
name="search_indexing_enabled",
19+
field=models.BooleanField(
20+
db_default=True,
21+
default=True,
22+
help_text="Enable/disable search indexing for this project",
23+
verbose_name="Enable search indexing",
24+
),
25+
),
26+
migrations.AddField(
27+
model_name="project",
28+
name="search_indexing_enabled",
29+
field=models.BooleanField(
30+
db_default=True,
31+
default=True,
32+
help_text="Enable/disable search indexing for this project",
33+
verbose_name="Enable search indexing",
34+
),
35+
),
36+
]

readthedocs/projects/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,13 @@ class Project(models.Model):
538538
),
539539
)
540540

541+
search_indexing_enabled = models.BooleanField(
542+
_("Enable search indexing"),
543+
default=True,
544+
db_default=True,
545+
help_text=_("Enable/disable search indexing for this project"),
546+
)
547+
541548
privacy_level = models.CharField(
542549
_("Privacy Level"),
543550
max_length=20,

readthedocs/projects/notifications.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
MESSAGE_PROJECT_ADDONS_BY_DEFAULT = "project:addons:by-default"
2020
MESSAGE_PROJECT_SSH_KEY_WITH_WRITE_ACCESS = "project:ssh-key-with-write-access"
2121
MESSAGE_PROJECT_DEPRECATED_WEBHOOK = "project:webhooks:deprecated"
22+
MESSAGE_PROJECT_SEARCH_INDEXING_DISABLED = "project:search:indexing-disabled"
2223

2324
messages = [
2425
Message(
@@ -208,5 +209,19 @@
208209
),
209210
type=INFO,
210211
),
212+
# NOTE: Disabling search for projects and notifying users is done manually for now.
213+
Message(
214+
id=MESSAGE_PROJECT_SEARCH_INDEXING_DISABLED,
215+
header=_("Search indexing has been disabled for this project"),
216+
body=_(
217+
textwrap.dedent(
218+
"""
219+
Your project hasn't received any searches recently, to optimize resources, we've disabled search indexing for this project.
220+
If you want to re-enable search indexing, check the "Enable search indexing" option from <a href="{% url 'projects_edit' instance.slug %}">the project settings</a>.
221+
"""
222+
).strip(),
223+
),
224+
type=INFO,
225+
),
211226
]
212227
registry.add(messages)

readthedocs/projects/tasks/search.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,16 @@ def _get_indexers(*, version: Version, build: Build, search_index_name=None):
161161
# This is because saving the objects in the DB will give them an id,
162162
# and we neeed this id to be `None` when indexing the objects in ES.
163163
# ES will generate a unique id for each document.
164-
# NOTE: If the version is external, we don't create a search index for it.
165-
if not version.is_external:
164+
# NOTE: We don't create a search indexer for:
165+
# - External versions
166+
# - Versions from projects with search indexing disabled
167+
# - Versions from delisted projects
168+
skip_search_indexing = (
169+
not version.project.search_indexing_enabled
170+
or version.is_external
171+
or version.project.delisted
172+
)
173+
if not skip_search_indexing:
166174
search_indexer = SearchIndexer(
167175
project=version.project,
168176
version=version,
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from django.test import TestCase
2+
from django_dynamic_fixture import get
3+
4+
from readthedocs.builds.constants import BUILD_STATE_FINISHED, EXTERNAL
5+
from readthedocs.builds.models import Build, Version
6+
from readthedocs.projects.models import Project
7+
from readthedocs.projects.tasks.search import SearchIndexer, _get_indexers
8+
9+
10+
class TestSearchIndexing(TestCase):
11+
"""Tests for search_indexing_enabled field behavior."""
12+
13+
def test_search_indexer_not_created_when_disabled(self):
14+
project = get(Project, search_indexing_enabled=False)
15+
version = project.versions.first()
16+
build = get(Build, version=version, state=BUILD_STATE_FINISHED, success=True)
17+
18+
indexers = _get_indexers(version=version, build=build)
19+
20+
# Check that no SearchIndexer is in the list
21+
search_indexers = [
22+
indexer for indexer in indexers if isinstance(indexer, SearchIndexer)
23+
]
24+
assert len(search_indexers) == 0
25+
26+
def test_search_indexer_created_when_enabled(self):
27+
"""Test that SearchIndexer is created when search_indexing_enabled is True."""
28+
project = get(
29+
Project,
30+
search_indexing_enabled=True,
31+
)
32+
version = project.versions.first()
33+
build = get(Build, version=version, state=BUILD_STATE_FINISHED, success=True)
34+
35+
indexers = _get_indexers(version=version, build=build)
36+
37+
# Check that SearchIndexer is in the list
38+
search_indexers = [
39+
indexer for indexer in indexers if isinstance(indexer, SearchIndexer)
40+
]
41+
assert len(search_indexers) == 1
42+
43+
def test_search_indexer_not_created_for_delisted_project(self):
44+
"""Test that SearchIndexer is not created for delisted projects."""
45+
project = get(
46+
Project,
47+
delisted=True,
48+
)
49+
version = project.versions.first()
50+
build = get(Build, version=version, state=BUILD_STATE_FINISHED, success=True)
51+
52+
indexers = _get_indexers(version=version, build=build)
53+
54+
# Check that no SearchIndexer is in the list
55+
search_indexers = [
56+
indexer for indexer in indexers if isinstance(indexer, SearchIndexer)
57+
]
58+
assert len(search_indexers) == 0
59+
60+
def test_search_indexer_not_created_for_external_version(self):
61+
"""Test that SearchIndexer is not created for external versions."""
62+
project = get(Project)
63+
version = get(Version, project=project, slug="123", built=True, type=EXTERNAL)
64+
build = get(Build, version=version, state=BUILD_STATE_FINISHED, success=True)
65+
66+
indexers = _get_indexers(version=version, build=build)
67+
68+
# Check that no SearchIndexer is in the list
69+
search_indexers = [
70+
indexer for indexer in indexers if isinstance(indexer, SearchIndexer)
71+
]
72+
assert len(search_indexers) == 0

readthedocs/projects/tests/test_views.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from allauth.account.models import EmailAddress
22
from django.contrib.auth.models import User
3+
from unittest import mock
34
from django.contrib.messages import get_messages
45
from django.test import TestCase, override_settings
56
from django.urls import reverse
@@ -317,3 +318,55 @@ def test_download_files(self):
317318
resp["X-Accel-Redirect"],
318319
f"/proxito/media/{type_}/project/latest/project.{extension}",
319320
)
321+
322+
323+
class TestProjectEditView(TestCase):
324+
def setUp(self):
325+
self.user = get(User)
326+
self.project = get(Project, slug="project", users=[self.user], repo="https://github.com/user/repo")
327+
self.url = reverse("projects_edit", args=[self.project.slug])
328+
self.client.force_login(self.user)
329+
330+
@mock.patch("readthedocs.projects.forms.trigger_build")
331+
@mock.patch("readthedocs.projects.forms.index_project")
332+
def test_search_indexing_enabled(self, index_project, trigger_build):
333+
resp = self.client.get(self.url)
334+
assert resp.status_code == 200
335+
form = resp.context["form"]
336+
assert "search_indexing_enabled" not in form.fields
337+
338+
self.project.search_indexing_enabled = False
339+
self.project.save()
340+
341+
resp = self.client.get(self.url)
342+
assert resp.status_code == 200
343+
form = resp.context["form"]
344+
assert "search_indexing_enabled" in form.fields
345+
346+
data = {
347+
"name": self.project.name,
348+
"repo": self.project.repo,
349+
"language": self.project.language,
350+
"default_version": self.project.default_version,
351+
"versioning_scheme": self.project.versioning_scheme,
352+
}
353+
354+
data["search_indexing_enabled"] = False
355+
resp = self.client.post(
356+
self.url,
357+
data=data,
358+
)
359+
assert resp.status_code == 302
360+
self.project.refresh_from_db()
361+
assert not self.project.search_indexing_enabled
362+
index_project.delay.assert_not_called()
363+
364+
data["search_indexing_enabled"] = True
365+
resp = self.client.post(
366+
self.url,
367+
data=data,
368+
)
369+
assert resp.status_code == 302
370+
self.project.refresh_from_db()
371+
assert self.project.search_indexing_enabled
372+
index_project.delay.assert_called_once_with(project_slug=self.project.slug)

readthedocs/search/documents.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,13 @@ def get_queryset(self):
5959
but it's not a priority to find a solution for this as long as "delisted" projects are
6060
understood to be projects with a negative reason for being delisted.
6161
"""
62-
return super().get_queryset().exclude(delisted=True).exclude(is_spam=True)
62+
return (
63+
super()
64+
.get_queryset()
65+
.filter(search_indexing_enabled=True)
66+
.exclude(delisted=True)
67+
.exclude(is_spam=True)
68+
)
6369

6470
class Django:
6571
model = Project
@@ -122,7 +128,8 @@ def get_queryset(self):
122128
"""Don't include ignored files and delisted projects."""
123129
queryset = super().get_queryset()
124130
queryset = (
125-
queryset.exclude(ignore=True)
131+
queryset.filter(project__search_indexing_enabled=True)
132+
.exclude(ignore=True)
126133
.exclude(project__delisted=True)
127134
.exclude(project__is_spam=True)
128135
.select_related("version", "project")

0 commit comments

Comments
 (0)