Skip to content

Commit 973f3d5

Browse files
committed
refactor: optimize SQL parser with caching and simplified state machine
1 parent f803cc8 commit 973f3d5

File tree

1 file changed

+45
-61
lines changed

1 file changed

+45
-61
lines changed

mariadb/impl/sql_parser.py

Lines changed: 45 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -14,94 +14,78 @@
1414
Based on mariadb-connector-nodejs implementation.
1515
"""
1616

17+
from functools import lru_cache
1718
from typing import List, Tuple
1819

19-
def split_sql_parts(sql: str) -> Tuple[bytes, List[int]]:
20-
"""
21-
Find positions of positional placeholders (?) in SQL and return SQL as bytes.
22-
23-
Args:
24-
sql: SQL statement
2520

26-
Returns:
27-
Tuple of (sql_bytes, placeholder_byte_positions)
28-
- sql_bytes: SQL encoded as UTF-8 bytes
29-
- placeholder_byte_positions: List of byte positions (start, end) pairs
30-
"""
21+
@lru_cache(maxsize=256)
22+
def split_sql_parts(sql: str) -> Tuple[bytes, List[int]]:
3123
sql_bytes = sql.encode('utf-8')
24+
length = len(sql_bytes)
3225
param_positions: List[int] = []
33-
34-
NORMAL = 0
35-
STRING = 1
36-
ESCAPE = 2
37-
BACKTICK = 3
38-
EOL = 4
39-
COMMENT = 5
40-
state = NORMAL
26+
27+
state = 0 # 0=NORMAL, 1=STRING, 2=ESCAPE, 3=BACKTICK, 4=EOL, 5=COMMENT
4128
single_quotes = False
42-
43-
last_char = 0 # only used for comment detection
44-
45-
for i, c in enumerate(sql_bytes):
46-
if state == ESCAPE:
47-
# Escaped char ends escape sequence
48-
state = STRING
29+
last_char = 0
30+
i = 0
31+
32+
while i < length:
33+
c = sql_bytes[i]
34+
35+
if state == 2: # ESCAPE
36+
state = 1
4937
last_char = c
38+
i += 1
5039
continue
5140

52-
if state == NORMAL:
53-
# Use dict lookup for clarity if desired
41+
if state == 0: # NORMAL
5442
if c == 63: # '?'
5543
param_positions.append(i)
5644
param_positions.append(i + 1)
5745
elif c == 39: # "'"
58-
state = STRING
46+
state = 1
5947
single_quotes = True
6048
elif c == 34: # '"'
61-
state = STRING
49+
state = 1
6250
single_quotes = False
6351
elif c == 96: # '`'
64-
state = BACKTICK
65-
elif c == 92: # '\'
66-
pass # nothing to do in NORMAL
67-
elif c == 42: # '*'
68-
if last_char == 47: # '/*'
69-
# Check for executable comment
70-
if i + 1 < len(sql_bytes):
71-
next_c = sql_bytes[i + 1]
72-
if next_c not in (33, 77): # '!' or 'M'
73-
state = COMMENT
74-
else:
75-
state = COMMENT
52+
state = 3
53+
elif c == 42 and last_char == 47: # '/*'
54+
if i + 1 < length:
55+
next_c = sql_bytes[i + 1]
56+
if next_c not in (33, 77): # not '!' or 'M'
57+
state = 5
58+
else:
59+
state = 5
7660
elif c == 47: # '/'
77-
if last_char == 42: # end of comment '*/'
78-
state = NORMAL
79-
elif last_char == 47: # start of // comment
80-
state = EOL
61+
if last_char == 42: # '*/'
62+
state = 0
63+
elif last_char == 47: # '//'
64+
state = 4
8165
elif c == 35: # '#'
82-
state = EOL
83-
elif c == 45: # '-'
84-
if last_char == 45: # '--'
85-
state = EOL
66+
state = 4
67+
elif c == 45 and last_char == 45: # '--'
68+
state = 4
8669

87-
elif state == STRING:
70+
elif state == 1: # STRING
8871
if c == 92: # '\'
89-
state = ESCAPE
72+
state = 2
9073
elif (c == 39 and single_quotes) or (c == 34 and not single_quotes):
91-
state = NORMAL
74+
state = 0
9275

93-
elif state == BACKTICK:
94-
if c == 96: # '`'
95-
state = NORMAL
76+
elif state == 3: # BACKTICK
77+
if c == 96:
78+
state = 0
9679

97-
elif state == EOL:
80+
elif state == 4: # EOL
9881
if c == 10: # '\n'
99-
state = NORMAL
82+
state = 0
10083

101-
elif state == COMMENT:
84+
elif state == 5: # COMMENT
10285
if last_char == 42 and c == 47: # '*/'
103-
state = NORMAL
86+
state = 0
10487

10588
last_char = c
89+
i += 1
10690

107-
return sql_bytes, param_positions
91+
return sql_bytes, param_positions

0 commit comments

Comments
 (0)