crate · surister · Nov 10, 2025 · Nov 6, 2025 · Nov 10, 2025 · amotl
diff --git a/cratedb_django/models/model.py b/cratedb_django/models/model.py
@@ -2,14 +2,17 @@
 from django.db.models.base import ModelBase
 
 # If a meta option has the value OMITTED, it will be omitted
-# from SQL creation.
-OMITTED = object()
+# from SQL creation. bool(Omitted) resolves to False.
+_OMITTED = type("OMITTED", (), {"__bool__": lambda _: False})
+OMITTED = _OMITTED()
 
 # dict of all the extra options a CrateModel Meta class has.
 # (name, default_value)
 CRATE_META_OPTIONS = {
     "auto_refresh": False,  # Automatically refresh a table on inserts.
     "partition_by": OMITTED,
+    "clustered_by": OMITTED,
+    "number_of_shards": OMITTED,
 }
 
 
@@ -49,7 +52,7 @@ class CrateModel(models.Model, metaclass=MetaCrate):
     def save(self, *args, **kwargs):
         super().save(*args, **kwargs)  # perform the actual save (insert or update)
         auto_refresh = getattr(self._meta, "auto_refresh", False)
-        if auto_refresh and self.pk:  # If self.pk is available, its an insert.
+        if auto_refresh and self.pk:  # If self.pk is available, it's an insert.
             table_name = self._meta.db_table
             with connection.cursor() as cursor:
                 cursor.execute(f"refresh table {table_name}")

diff --git a/cratedb_django/schema.py b/cratedb_django/schema.py
@@ -7,6 +7,13 @@
 from cratedb_django.models.model import OMITTED
 
 
+def check_field(model, field_name: str) -> None:
+    try:
+        model._meta.get_field(field_name)
+    except Exception as e:
+        raise ValueError(f"Column {field_name!r} does not exist in model") from e
+
+
 class DatabaseSchemaEditor(BaseDatabaseSchemaEditor):
     # TODO DOCUMENT CAVEAT: IF YOU START WITH A DJANGO MIGRATIONS CREATED BY OTHER DATABASE LIKE POSTGRES,
     # NEW MIGRATIONS WITH NO-OP operations like drop constraint, might produce confusing behaviour, you might
@@ -72,7 +79,7 @@ def alter_field(self, model, old_field, new_field, strict=False):
             return
         return super().alter_field(model, old_field, new_field, strict)
 
-    def table_sql(self, model):
+    def table_sql(self, model) -> tuple:
         sql = list(super().table_sql(model))
 
         partition_by = getattr(model._meta, "partition_by", OMITTED)
@@ -87,12 +94,34 @@ def table_sql(self, model):
                 ]
 
             for field in partition_by:
-                try:
-                    model._meta.get_field(field)
-                except Exception as e:
-                    raise ValueError(
-                        f"Column {field!r} does not exist in " f"model"
-                    ) from e
+                check_field(model, field)
 
             sql[0] += f" PARTITIONED BY ({", ".join(partition_by)})"
+
+        clustered_by = getattr(model._meta, "clustered_by", OMITTED)
+        if clustered_by is not OMITTED:
+            if not isinstance(clustered_by, str) or not clustered_by:
+                raise ValueError(
+                    "clustered_by has to be a non-empty "
+                    f"string, not {clustered_by!r}"
+                )
+            check_field(model, clustered_by)
+
+        number_of_shards = getattr(model._meta, "number_of_shards", OMITTED)
+        if number_of_shards is not OMITTED:
+            if not isinstance(number_of_shards, int) or number_of_shards == 0:
+                raise ValueError(
+                    "number_of_shards has to be an integer "
+                    f"bigger than 0, not {number_of_shards!r}"
+                )
+
+        if clustered_by and not number_of_shards:
+            sql[0] += f" CLUSTERED BY ({clustered_by})"
+
+        if clustered_by and number_of_shards:
+            sql[0] += f" CLUSTERED BY ({clustered_by}) INTO {number_of_shards} shards"
+
+        if not clustered_by and number_of_shards:
+            sql[0] += f" CLUSTERED INTO ({number_of_shards})"
+
         return tuple(sql)
diff --git a/tests/test_model.py b/tests/test_model.py
@@ -1,7 +1,7 @@
 import pytest
 
 from cratedb_django.models import CrateModel
-from cratedb_django.models.model import CRATE_META_OPTIONS
+from cratedb_django.models.model import CRATE_META_OPTIONS, OMITTED
 
 from django.forms.models import model_to_dict
 from django.db import connection, models
@@ -212,3 +212,74 @@ class Meta:
             SomeModel, SomeModel._meta.get_field("id")
         )
         assert sql == "text NOT NULL PRIMARY KEY"
+
+
+def test_clustered_by():
+    """
+    `clustered_by` and `number_of_shards` meta class attributes.
+    """
+
+    class MetaOptions(CrateModel):
+        id = models.IntegerField()
+        one = models.TextField()
+        two = models.TextField()
+        three = models.TextField()
+
+        class Meta:
+            app_label = "ignore"
+            clustered_by = "one"
+            number_of_shards = 3
+
+    with connection.schema_editor() as schema_editor:
+        sql, params = schema_editor.table_sql(MetaOptions)
+        assert "CLUSTERED BY (one) INTO 3 shards" in sql
+
+    MetaOptions._meta.clustered_by = "one"
+    MetaOptions._meta.number_of_shards = OMITTED
+    with connection.schema_editor() as schema_editor:
+        sql, params = schema_editor.table_sql(MetaOptions)
+        assert "CLUSTERED BY (one)" in sql
+        assert "INTO 3 shards" not in sql
+
+    MetaOptions._meta.clustered_by = OMITTED
+    MetaOptions._meta.number_of_shards = 3
+    with connection.schema_editor() as schema_editor:
+        sql, params = schema_editor.table_sql(MetaOptions)
+        assert "CLUSTERED INTO 3 shards" not in sql
+
+    MetaOptions._meta.clustered_by = OMITTED
+    MetaOptions._meta.number_of_shards = OMITTED
+    with connection.schema_editor() as schema_editor:
+        sql, params = schema_editor.table_sql(MetaOptions)
+        assert "INTO 3 shards" not in sql
+        assert "CLUSTERED" not in sql
+
+    with pytest.raises(ValueError, match="Column 'nocolumn' does not exist in model"):
+        MetaOptions._meta.clustered_by = "nocolumn"
+        MetaOptions._meta.number_of_shards = OMITTED
+        with connection.schema_editor() as schema_editor:
+            schema_editor.table_sql(MetaOptions)
+
+    with pytest.raises(
+        ValueError, match="clustered_by has to be a non-empty string, not 1"
+    ):
+        MetaOptions._meta.clustered_by = 1
+        with connection.schema_editor() as schema_editor:
+            schema_editor.table_sql(MetaOptions)
+
+    with pytest.raises(
+        ValueError, match="number_of_shards has to be an integer bigger than 0"
+    ):
+        MetaOptions._meta.clustered_by = OMITTED
+        MetaOptions._meta.number_of_shards = 0
+        with connection.schema_editor() as schema_editor:
+            schema_editor.table_sql(MetaOptions)
+
+    with pytest.raises(
+        ValueError,
+        match="number_of_shards has to be an integer bigger than 0, " "not 'abcdef'",
+    ):
+        MetaOptions._meta.clustered_by = OMITTED
+        MetaOptions._meta.number_of_shards = "abcdef"
+        with connection.schema_editor() as schema_editor:
+            schema_editor.table_sql(MetaOptions)