@@ -80,6 +80,10 @@ impl From<()> for Options {
8080}
8181
8282impl Options {
83+ /// Check if we should skip a data migration. Returns `true` if it should be skipped.
84+ ///
85+ /// Skipping means that the "data" part of the migration should not be processes. The schema
86+ /// part still will be processes.
8387 pub fn should_skip ( & self , name : & str ) -> bool {
8488 if self . skip_all {
8589 // we skip all migration
@@ -104,6 +108,7 @@ impl From<&Options> for Partition {
104108 }
105109}
106110
111+ /// A trait for processing documents
107112pub trait DocumentProcessor {
108113 fn process < D > (
109114 & self ,
@@ -116,6 +121,42 @@ pub trait DocumentProcessor {
116121}
117122
118123impl < ' c > DocumentProcessor for SchemaManager < ' c > {
124+ /// Process documents for a schema *data* migration.
125+ ///
126+ /// ## Pre-requisites
127+ ///
128+ /// The database should be maintenance mode. Meaning that the actual application should be
129+ /// running from a read-only clone for the time of processing.
130+ ///
131+ /// ## Partitioning
132+ ///
133+ /// This will partition documents and only process documents selected for *this* partition.
134+ /// The partition configuration normally comes from outside, as configuration through env-vars.
135+ ///
136+ /// This means that there may be other instances of this processor running in a different
137+ /// process instance. However, not touching documents of our partition.
138+ ///
139+ /// ## Transaction strategy
140+ ///
141+ /// The processor will identify all documents, filtering out all which are not part of this
142+ /// partition. This is done in a dedicated transaction. As the database is supposed to be in
143+ /// read-only mode for the running instance, this is ok as no additional documents will be
144+ /// created during the time of processing.
145+ ///
146+ /// Next, it is processing all found documents, in a concurrent way. Meaning, this single
147+ /// process instance, will process multiple documents in parallel.
148+ ///
149+ /// Each document is loaded and processed within a dedicated transaction. Commiting the
150+ /// transaction at the end each step and before moving on the next document.
151+ ///
152+ /// As handlers are intended to be idempotent, there's no harm in re-running them, in case
153+ /// things go wrong.
154+ ///
155+ /// ## Caveats
156+ ///
157+ /// However, this may lead to a situation where only a part of the documents is processed.
158+ /// But, this is ok, as the migration is supposed to run on a clone of the database and so the
159+ /// actual system is still running from the read-only clone of the original data.
119160 async fn process < D > (
120161 & self ,
121162 storage : & DispatchBackend ,
@@ -188,6 +229,9 @@ impl<'c> DocumentProcessor for SchemaManager<'c> {
188229}
189230
190231/// A handler for data migration of documents.
232+ ///
233+ /// Handlers have to be written in a way that they can be re-run multiple times on the same
234+ /// document without failing and creating the exact same output state.
191235#[ macro_export]
192236macro_rules! handler {
193237 ( async | $doc: ident: $doc_ty: ty, $model: ident, $tx: ident | $body: block) => { {
0 commit comments