Skip to content

Commit 4be516b

Browse files
committed
repozo: support incremental recover
Which allows to recover a zodb filestorage by only appending the missing chunks from the latest recovered file, instead of always recovering from zero.
1 parent 6543901 commit 4be516b

File tree

2 files changed

+306
-26
lines changed

2 files changed

+306
-26
lines changed

src/ZODB/scripts/repozo.py

Lines changed: 93 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@
7373
Note: for the stdout case, the index file will **not** be restored
7474
automatically.
7575
76+
-F / --full
77+
Force a full recover. By default, an incremental recover is made
78+
if possible, by only copying the latest backup delta to the recovered
79+
ZODB file. A full recover will always be done if a pack has occured
80+
since the last incremental backup.
81+
7682
-w
7783
--with-verify
7884
Verify on the fly the backup files on recovering. This option runs
@@ -185,7 +191,7 @@ class Options:
185191
mode = None # BACKUP, RECOVER or VERIFY
186192
file = None # name of input Data.fs file
187193
repository = None # name of directory holding backups
188-
full = False # True forces full backup
194+
full = False # True forces full backup or full recovery
189195
date = None # -D argument, if any
190196
output = None # where to write recovered data; None = stdout
191197
quick = False # -Q flag state
@@ -396,9 +402,8 @@ def func(data):
396402
return bytesread, sum.hexdigest()
397403

398404

399-
def recover_repofiles(options, repofiles, outfp):
405+
def recover_repofiles(options, repofiles, datfile, outfp):
400406
if options.withverify:
401-
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
402407
with open(datfile) as fp:
403408
truth_dict = {}
404409
for line in fp:
@@ -709,15 +714,7 @@ def do_backup(options):
709714
do_full_backup(options)
710715

711716

712-
def do_recover(options):
713-
# Find the first full backup at or before the specified date
714-
repofiles = find_files(options)
715-
if not repofiles:
716-
if options.date:
717-
raise NoFiles(f'No files in repository before {options.date}')
718-
else:
719-
raise NoFiles('No files in repository')
720-
717+
def do_full_recover(options, repofiles):
721718
files_to_close = ()
722719
if options.output is None:
723720
log('Recovering file to stdout')
@@ -734,17 +731,8 @@ def do_recover(options):
734731
files_to_close += (outfp,)
735732

736733
try:
737-
recover_repofiles(options, repofiles, outfp)
738-
if options.output is not None:
739-
last_base = os.path.splitext(repofiles[-1])[0]
740-
source_index = '%s.index' % last_base
741-
target_index = '%s.index' % options.output
742-
if os.path.exists(source_index):
743-
log('Restoring index file %s to %s',
744-
source_index, target_index)
745-
shutil.copyfile(source_index, target_index)
746-
else:
747-
log('No index file to restore: %s', source_index)
734+
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
735+
recover_repofiles(options, repofiles, datfile, outfp)
748736
finally:
749737
for f in files_to_close:
750738
f.close()
@@ -758,6 +746,88 @@ def do_recover(options):
758746
raise
759747

760748

749+
def do_incremental_recover(options, repofiles):
750+
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
751+
log('Recovering (incrementally) file to %s', options.output)
752+
with open(datfile) as fp, open(options.output, 'r+b') as outfp:
753+
outfp.seek(0, 2)
754+
initial_length = outfp.tell()
755+
previous_chunk = None
756+
for line in fp:
757+
fn, startpos, endpos, _ = chunk = line.split()
758+
startpos = int(startpos)
759+
endpos = int(endpos)
760+
if endpos > initial_length:
761+
break
762+
previous_chunk = chunk
763+
else:
764+
if endpos == initial_length:
765+
log('Target file is same size as latest backup, '
766+
'doing nothing.')
767+
return
768+
else:
769+
log('Target file is longer than latest backup, '
770+
'falling back to a full recover.')
771+
return do_full_recover(options, repofiles)
772+
if previous_chunk is None:
773+
log('Target file shorter than full backup, '
774+
'falling back to a full recover.')
775+
return do_full_recover(options, repofiles)
776+
check_startpos = int(previous_chunk[1])
777+
check_endpos = int(previous_chunk[2])
778+
outfp.seek(check_startpos)
779+
if previous_chunk[3] != checksum(outfp, check_endpos - check_startpos):
780+
log('Last whole common chunk checksum did not match with backup, '
781+
'falling back to a full recover.')
782+
return do_full_recover(options, repofiles)
783+
assert outfp.tell() == startpos, (outfp.tell(), startpos)
784+
785+
if startpos < initial_length:
786+
log('Truncating target file %i bytes before its end',
787+
initial_length - startpos)
788+
filename = os.path.join(options.repository,
789+
os.path.basename(fn))
790+
first_file_to_restore = repofiles.index(filename)
791+
assert first_file_to_restore > 0, (
792+
first_file_to_restore, options.repository, fn, filename, repofiles)
793+
794+
temporary_output_file = options.output + '.part'
795+
os.rename(options.output, temporary_output_file)
796+
with open(temporary_output_file, 'r+b') as outfp:
797+
outfp.seek(startpos)
798+
recover_repofiles(options,
799+
repofiles[first_file_to_restore:],
800+
datfile,
801+
outfp)
802+
os.rename(temporary_output_file, options.output)
803+
804+
805+
def do_recover(options):
806+
# Find the first full backup at or before the specified date
807+
repofiles = find_files(options)
808+
if not repofiles:
809+
if options.date:
810+
raise NoFiles(f'No files in repository before {options.date}')
811+
else:
812+
raise NoFiles('No files in repository')
813+
814+
if options.full or not os.path.exists(options.output):
815+
do_full_recover(options, repofiles)
816+
else:
817+
do_incremental_recover(options, repofiles)
818+
819+
if options.output is not None:
820+
last_base = os.path.splitext(repofiles[-1])[0]
821+
source_index = '%s.index' % last_base
822+
target_index = '%s.index' % options.output
823+
if os.path.exists(source_index):
824+
log('Restoring index file %s to %s',
825+
source_index, target_index)
826+
shutil.copyfile(source_index, target_index)
827+
else:
828+
log('No index file to restore: %s', source_index)
829+
830+
761831
def do_verify(options):
762832
# Verify the sizes and checksums of all files mentioned in the .dat file
763833
repofiles = find_files(options)

0 commit comments

Comments
 (0)