From bd81257c3449e0850b2865b3d8eb7269e6eb65be Mon Sep 17 00:00:00 2001 From: Balint-H <41113387+Balint-H@users.noreply.github.com> Date: Fri, 11 Feb 2022 12:31:26 +0000 Subject: [PATCH] Improve parse times Significant speedup can be reached by splitting up the data string with regex instead of parsing it character by character. Also, individual string values of frame components are converted to float, reducing memory footprint of the Bvh object. --- bvh.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/bvh.py b/bvh.py index 40fc5a2..e0fa256 100644 --- a/bvh.py +++ b/bvh.py @@ -50,21 +50,12 @@ def __init__(self, data): self.tokenize() def tokenize(self): - first_round = [] - accumulator = '' - for char in self.data: - if char not in ('\n', '\r'): - accumulator += char - elif accumulator: - first_round.append(re.split('\\s+', accumulator.strip())) - accumulator = '' + lines = re.split('\n|\r', self.data) + first_round = [re.split('\\s+', line.strip()) for line in lines[:-1]] node_stack = [self.root] - frame_time_found = False node = None - for item in first_round: - if frame_time_found: - self.frames.append(item) - continue + data_start_idx = 0 + for line, item in enumerate(first_round): key = item[0] if key == '{': node_stack.append(node) @@ -74,7 +65,9 @@ def tokenize(self): node = BvhNode(item) node_stack[-1].add_child(node) if item[0] == 'Frame' and item[1] == 'Time:': - frame_time_found = True + data_start_idx = line + break + self.frames = [[float(scalar) for scalar in line] for line in first_round[data_start_idx+1:]] def search(self, *items): found_nodes = []