From bd81257c3449e0850b2865b3d8eb7269e6eb65be Mon Sep 17 00:00:00 2001
From: Balint-H <41113387+Balint-H@users.noreply.github.com>
Date: Fri, 11 Feb 2022 12:31:26 +0000
Subject: [PATCH] Improve parse times

Significant speedup can be reached by splitting up the data string with regex instead of parsing it character by character. Also, individual string values of frame components are converted to float, reducing memory footprint of the Bvh object.
---
 bvh.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/bvh.py b/bvh.py
index 40fc5a2..e0fa256 100644
--- a/bvh.py
+++ b/bvh.py
@@ -50,21 +50,12 @@ def __init__(self, data):
         self.tokenize()
 
     def tokenize(self):
-        first_round = []
-        accumulator = ''
-        for char in self.data:
-            if char not in ('\n', '\r'):
-                accumulator += char
-            elif accumulator:
-                    first_round.append(re.split('\\s+', accumulator.strip()))
-                    accumulator = ''
+        lines = re.split('\n|\r', self.data)
+        first_round = [re.split('\\s+', line.strip()) for line in lines[:-1]]
         node_stack = [self.root]
-        frame_time_found = False
         node = None
-        for item in first_round:
-            if frame_time_found:
-                self.frames.append(item)
-                continue
+        data_start_idx = 0
+        for line, item in enumerate(first_round):
             key = item[0]
             if key == '{':
                 node_stack.append(node)
@@ -74,7 +65,9 @@ def tokenize(self):
                 node = BvhNode(item)
                 node_stack[-1].add_child(node)
             if item[0] == 'Frame' and item[1] == 'Time:':
-                frame_time_found = True
+                data_start_idx = line
+                break
+        self.frames = [[float(scalar) for scalar in line] for line in first_round[data_start_idx+1:]]
 
     def search(self, *items):
         found_nodes = []