Skip to content

Commit 785d9e7

Browse files
committed
Implement clustered tables' options in Meta: number_of_shards and clustered_by
1 parent 99c8fb4 commit 785d9e7

File tree

3 files changed

+117
-12
lines changed

3 files changed

+117
-12
lines changed

cratedb_django/models/model.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
from django.db.models.base import ModelBase
33

44
# If a meta option has the value OMITTED, it will be omitted
5-
# from SQL creation.
6-
OMITTED = object()
5+
# from SQL creation. bool(Omitted) resolves to False.
6+
_OMITTED = type("OMITTED", (), {"__bool__": lambda _: False})
7+
OMITTED = _OMITTED()
78

89
# dict of all the extra options a CrateModel Meta class has.
910
# (name, default_value)
1011
CRATE_META_OPTIONS = {
1112
"auto_refresh": False, # Automatically refresh a table on inserts.
1213
"partition_by": OMITTED,
14+
"clustered_by": OMITTED,
15+
"number_of_shards": OMITTED,
1316
}
1417

1518

@@ -49,7 +52,7 @@ class CrateModel(models.Model, metaclass=MetaCrate):
4952
def save(self, *args, **kwargs):
5053
super().save(*args, **kwargs) # perform the actual save (insert or update)
5154
auto_refresh = getattr(self._meta, "auto_refresh", False)
52-
if auto_refresh and self.pk: # If self.pk is available, its an insert.
55+
if auto_refresh and self.pk: # If self.pk is available, it's an insert.
5356
table_name = self._meta.db_table
5457
with connection.cursor() as cursor:
5558
cursor.execute(f"refresh table {table_name}")

cratedb_django/schema.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77
from cratedb_django.models.model import OMITTED
88

99

10+
def check_field(model, field_name: str) -> None:
11+
try:
12+
model._meta.get_field(field_name)
13+
except Exception as e:
14+
raise ValueError(f"Column {field_name!r} does not exist in " f"model") from e
15+
16+
1017
class DatabaseSchemaEditor(BaseDatabaseSchemaEditor):
1118
# TODO DOCUMENT CAVEAT: IF YOU START WITH A DJANGO MIGRATIONS CREATED BY OTHER DATABASE LIKE POSTGRES,
1219
# NEW MIGRATIONS WITH NO-OP operations like drop constraint, might produce confusing behaviour, you might
@@ -72,7 +79,7 @@ def alter_field(self, model, old_field, new_field, strict=False):
7279
return
7380
return super().alter_field(model, old_field, new_field, strict)
7481

75-
def table_sql(self, model):
82+
def table_sql(self, model) -> tuple:
7683
sql = list(super().table_sql(model))
7784

7885
partition_by = getattr(model._meta, "partition_by", OMITTED)
@@ -87,12 +94,36 @@ def table_sql(self, model):
8794
]
8895

8996
for field in partition_by:
90-
try:
91-
model._meta.get_field(field)
92-
except Exception as e:
93-
raise ValueError(
94-
f"Column {field!r} does not exist in " f"model"
95-
) from e
97+
check_field(model, field)
9698

9799
sql[0] += f" PARTITIONED BY ({", ".join(partition_by)})"
100+
101+
clustered_by = getattr(model._meta, "clustered_by", OMITTED)
102+
if clustered_by is not OMITTED:
103+
if not isinstance(clustered_by, str) or not clustered_by:
104+
raise ValueError(
105+
"clustered_by has to be a non-empty "
106+
f"string, not {clustered_by!r}"
107+
)
108+
check_field(model, clustered_by)
109+
110+
number_of_shards = getattr(model._meta, "number_of_shards", OMITTED)
111+
if number_of_shards is not OMITTED:
112+
if not isinstance(number_of_shards, int) or number_of_shards == 0:
113+
raise ValueError(
114+
"number_of_shards has to be an integer "
115+
f"bigger than 0, not {number_of_shards!r}"
116+
)
117+
118+
if clustered_by and not number_of_shards:
119+
sql[0] += f" CLUSTERED BY ({clustered_by})"
120+
121+
if clustered_by and number_of_shards:
122+
sql[0] += (
123+
f" CLUSTERED BY ({clustered_by}) INTO {number_of_shards} " f"shards"
124+
)
125+
126+
if not clustered_by and number_of_shards:
127+
sql[0] += f" CLUSTERED INTO ({number_of_shards})"
128+
98129
return tuple(sql)

tests/test_model.py

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from cratedb_django.models import CrateModel
4-
from cratedb_django.models.model import CRATE_META_OPTIONS
4+
from cratedb_django.models.model import CRATE_META_OPTIONS, OMITTED
55

66
from django.forms.models import model_to_dict
77
from django.db import connection, models
@@ -138,7 +138,7 @@ class Meta:
138138

139139

140140
def test_model_meta_partition_by():
141-
"""Test partition_by option in Meta class."""
141+
"""Test `partition_by` option in Meta class."""
142142

143143
class MetaOptions(CrateModel):
144144
one = models.TextField()
@@ -174,3 +174,74 @@ class Meta:
174174
):
175175
with connection.schema_editor() as schema_editor:
176176
schema_editor.table_sql(MetaOptions)
177+
178+
179+
def test_clustered_by():
180+
"""
181+
`clustered_by` and `number_of_shards` meta class attributes.
182+
"""
183+
184+
class MetaOptions(CrateModel):
185+
id = models.IntegerField()
186+
one = models.TextField()
187+
two = models.TextField()
188+
three = models.TextField()
189+
190+
class Meta:
191+
app_label = "ignore"
192+
clustered_by = "one"
193+
number_of_shards = 3
194+
195+
with connection.schema_editor() as schema_editor:
196+
sql, params = schema_editor.table_sql(MetaOptions)
197+
assert "CLUSTERED BY (one) INTO 3 shards" in sql
198+
199+
MetaOptions._meta.clustered_by = "one"
200+
MetaOptions._meta.number_of_shards = OMITTED
201+
with connection.schema_editor() as schema_editor:
202+
sql, params = schema_editor.table_sql(MetaOptions)
203+
assert "CLUSTERED BY (one)" in sql
204+
assert "INTO 3 shards" not in sql
205+
206+
MetaOptions._meta.clustered_by = OMITTED
207+
MetaOptions._meta.number_of_shards = 3
208+
with connection.schema_editor() as schema_editor:
209+
sql, params = schema_editor.table_sql(MetaOptions)
210+
assert "CLUSTERED INTO 3 shards" not in sql
211+
212+
MetaOptions._meta.clustered_by = OMITTED
213+
MetaOptions._meta.number_of_shards = OMITTED
214+
with connection.schema_editor() as schema_editor:
215+
sql, params = schema_editor.table_sql(MetaOptions)
216+
assert "INTO 3 shards" not in sql
217+
assert "CLUSTERED" not in sql
218+
219+
with pytest.raises(ValueError, match="Column 'nocolumn' does not exist in model"):
220+
MetaOptions._meta.clustered_by = "nocolumn"
221+
MetaOptions._meta.number_of_shards = OMITTED
222+
with connection.schema_editor() as schema_editor:
223+
schema_editor.table_sql(MetaOptions)
224+
225+
with pytest.raises(
226+
ValueError, match="clustered_by has to be a non-empty string, not 1"
227+
):
228+
MetaOptions._meta.clustered_by = 1
229+
with connection.schema_editor() as schema_editor:
230+
schema_editor.table_sql(MetaOptions)
231+
232+
with pytest.raises(
233+
ValueError, match="number_of_shards has to be an integer bigger than 0"
234+
):
235+
MetaOptions._meta.clustered_by = OMITTED
236+
MetaOptions._meta.number_of_shards = 0
237+
with connection.schema_editor() as schema_editor:
238+
schema_editor.table_sql(MetaOptions)
239+
240+
with pytest.raises(
241+
ValueError,
242+
match="number_of_shards has to be an integer bigger than 0, " "not 'abcdef'",
243+
):
244+
MetaOptions._meta.clustered_by = OMITTED
245+
MetaOptions._meta.number_of_shards = "abcdef"
246+
with connection.schema_editor() as schema_editor:
247+
schema_editor.table_sql(MetaOptions)

0 commit comments

Comments
 (0)